com.google.cloud.dataflow.sdk.coders.Coder Java Examples

The following examples show how to use com.google.cloud.dataflow.sdk.coders.Coder. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: StateCheckpointUtils.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
public static <K> void encodeState(Map<K, FlinkStateInternals<K>> perKeyStateInternals,
						 StateCheckpointWriter writer, Coder<K> keyCoder) throws IOException {
	CoderTypeSerializer<K> keySerializer = new CoderTypeSerializer<>(keyCoder);

	int noOfKeys = perKeyStateInternals.size();
	writer.writeInt(noOfKeys);
	for (Map.Entry<K, FlinkStateInternals<K>> keyStatePair : perKeyStateInternals.entrySet()) {
		K key = keyStatePair.getKey();
		FlinkStateInternals<K> state = keyStatePair.getValue();

		// encode the key
		writer.serializeKey(key, keySerializer);

		// write the associated state
		state.persistState(writer);
	}
}
 
Example #2
Source File: KvCoderComperator.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
@Override
public void putNormalizedKey(KV<K, V> record, MemorySegment target, int offset, int numBytes) {
	buffer1.reset();
	try {
		keyCoder.encode(record.getKey(), buffer1, Coder.Context.NESTED);
	} catch (IOException e) {
		throw new RuntimeException("Could not serializer " + record + " using coder " + coder + ": " + e);
	}
	final byte[] data = buffer1.getBuffer();
	final int limit = offset + numBytes;

	int numBytesPut = Math.min(numBytes, buffer1.size());

	target.put(offset, data, 0, numBytesPut);

	offset += numBytesPut;

	while (offset < limit) {
		target.put(offset++, (byte) 0);
	}
}
 
Example #3
Source File: KvCoderComperator.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
@Override
public int compare(KV<K, V> first, KV<K, V> second) {
	try {
		buffer1.reset();
		buffer2.reset();
		keyCoder.encode(first.getKey(), buffer1, Coder.Context.OUTER);
		keyCoder.encode(second.getKey(), buffer2, Coder.Context.OUTER);
		byte[] arr = buffer1.getBuffer();
		byte[] arrOther = buffer2.getBuffer();
		if (buffer1.size() != buffer2.size()) {
			return buffer1.size() - buffer2.size();
		}
		int len = buffer1.size();
		for(int i = 0; i < len; i++ ) {
			if (arr[i] != arrOther[i]) {
				return arr[i] - arrOther[i];
			}
		}
		return 0;
	} catch (IOException e) {
		throw new RuntimeException("Could not compare reference.", e);
	}
}
 
Example #4
Source File: KvCoderComperator.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
@Override
public boolean equalToReference(KV<K, V> candidate) {
	try {
		buffer2.reset();
		keyCoder.encode(candidate.getKey(), buffer2, Coder.Context.OUTER);
		byte[] arr = referenceBuffer.getBuffer();
		byte[] arrOther = buffer2.getBuffer();
		if (referenceBuffer.size() != buffer2.size()) {
			return false;
		}
		int len = buffer2.size();
		for(int i = 0; i < len; i++ ) {
			if (arr[i] != arrOther[i]) {
				return false;
			}
		}
		return true;
	} catch (IOException e) {
		throw new RuntimeException("Could not compare reference.", e);
	}
}
 
Example #5
Source File: FlinkStreamingCreateFunction.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
@Override
public void flatMap(IN value, Collector<WindowedValue<OUT>> out) throws Exception {

	@SuppressWarnings("unchecked")
	OUT voidValue = (OUT) VoidCoderTypeSerializer.VoidValue.INSTANCE;
	for (byte[] element : elements) {
		ByteArrayInputStream bai = new ByteArrayInputStream(element);
		OUT outValue = coder.decode(bai, Coder.Context.OUTER);

		if (outValue == null) {
			out.collect(WindowedValue.of(voidValue, Instant.now(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING));
		} else {
			out.collect(WindowedValue.of(outValue, Instant.now(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING));
		}
	}

	out.close();
}
 
Example #6
Source File: CoderComparator.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
@Override
public int compare(T first, T second) {
	try {
		buffer1.reset();
		buffer2.reset();
		coder.encode(first, buffer1, Coder.Context.OUTER);
		coder.encode(second, buffer2, Coder.Context.OUTER);
		byte[] arr = buffer1.getBuffer();
		byte[] arrOther = buffer2.getBuffer();
		if (buffer1.size() != buffer2.size()) {
			return buffer1.size() - buffer2.size();
		}
		int len = buffer1.size();
		for(int i = 0; i < len; i++ ) {
			if (arr[i] != arrOther[i]) {
				return arr[i] - arrOther[i];
			}
		}
		return 0;
	} catch (IOException e) {
		throw new RuntimeException("Could not compare: ", e);
	}
}
 
Example #7
Source File: CoderComparator.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
@Override
public boolean equalToReference(T candidate) {
	try {
		buffer2.reset();
		coder.encode(candidate, buffer2, Coder.Context.OUTER);
		byte[] arr = referenceBuffer.getBuffer();
		byte[] arrOther = buffer2.getBuffer();
		if (referenceBuffer.size() != buffer2.size()) {
			return false;
		}
		int len = buffer2.size();
		for(int i = 0; i < len; i++ ) {
			if (arr[i] != arrOther[i]) {
				return false;
			}
		}
		return true;
	} catch (IOException e) {
		throw new RuntimeException("Could not compare reference.", e);
	}
}
 
Example #8
Source File: CoderComparator.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
@Override
public void putNormalizedKey(T record, MemorySegment target, int offset, int numBytes) {
	buffer1.reset();
	try {
		coder.encode(record, buffer1, Coder.Context.OUTER);
	} catch (IOException e) {
		throw new RuntimeException("Could not serializer " + record + " using coder " + coder + ": " + e);
	}
	final byte[] data = buffer1.getBuffer();
	final int limit = offset + numBytes;

	target.put(offset, data, 0, Math.min(numBytes, buffer1.size()));

	offset += buffer1.size();

	while (offset < limit) {
		target.put(offset++, (byte) 0);
	}
}
 
Example #9
Source File: FlinkGroupByKeyWrapper.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
public static <K, V> KeyedStream<WindowedValue<KV<K, V>>, K> groupStreamByKey(DataStream<WindowedValue<KV<K, V>>> inputDataStream, KvCoder<K, V> inputKvCoder) {
	final Coder<K> keyCoder = inputKvCoder.getKeyCoder();
	final TypeInformation<K> keyTypeInfo = new CoderTypeInformation<>(keyCoder);
	final boolean isKeyVoid = keyCoder instanceof VoidCoder;

	return inputDataStream.keyBy(
			new KeySelectorWithQueryableResultType<K, V>() {

				@Override
				public K getKey(WindowedValue<KV<K, V>> value) throws Exception {
					return isKeyVoid ? (K) VoidCoderTypeSerializer.VoidValue.INSTANCE :
							value.getValue().getKey();
				}

				@Override
				public TypeInformation<K> getProducedType() {
					return keyTypeInfo;
				}
			});
}
 
Example #10
Source File: StateCheckpointUtils.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
public static <K> Map<K, FlinkStateInternals<K>> decodeState(
		StateCheckpointReader reader,
		OutputTimeFn<? super BoundedWindow> outputTimeFn,
		Coder<K> keyCoder,
		Coder<? extends BoundedWindow> windowCoder,
		ClassLoader classLoader) throws IOException, ClassNotFoundException {

	int noOfKeys = reader.getInt();
	Map<K, FlinkStateInternals<K>> perKeyStateInternals = new HashMap<>(noOfKeys);
	perKeyStateInternals.clear();

	CoderTypeSerializer<K> keySerializer = new CoderTypeSerializer<>(keyCoder);
	for (int i = 0; i < noOfKeys; i++) {

		// decode the key.
		K key = reader.deserializeKey(keySerializer);

		//decode the state associated to the key.
		FlinkStateInternals<K> stateForKey =
				new FlinkStateInternals<>(key, keyCoder, windowCoder, outputTimeFn);
		stateForKey.restoreState(reader, classLoader);
		perKeyStateInternals.put(key, stateForKey);
	}
	return perKeyStateInternals;
}
 
Example #11
Source File: StateCheckpointUtils.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
public static <K> void encodeTimers(Map<K, Set<TimerInternals.TimerData>> allTimers,
						  StateCheckpointWriter writer,
						  Coder<K> keyCoder) throws IOException {
	CoderTypeSerializer<K> keySerializer = new CoderTypeSerializer<>(keyCoder);

	int noOfKeys = allTimers.size();
	writer.writeInt(noOfKeys);
	for (Map.Entry<K, Set<TimerInternals.TimerData>> timersPerKey : allTimers.entrySet()) {
		K key = timersPerKey.getKey();

		// encode the key
		writer.serializeKey(key, keySerializer);

		// write the associated timers
		Set<TimerInternals.TimerData> timers = timersPerKey.getValue();
		encodeTimerDataForKey(writer, timers);
	}
}
 
Example #12
Source File: StateCheckpointUtils.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
public static <K> Map<K, Set<TimerInternals.TimerData>> decodeTimers(
		StateCheckpointReader reader,
		Coder<? extends BoundedWindow> windowCoder,
		Coder<K> keyCoder) throws IOException {

	int noOfKeys = reader.getInt();
	Map<K, Set<TimerInternals.TimerData>> activeTimers = new HashMap<>(noOfKeys);
	activeTimers.clear();

	CoderTypeSerializer<K> keySerializer = new CoderTypeSerializer<>(keyCoder);
	for (int i = 0; i < noOfKeys; i++) {

		// decode the key.
		K key = reader.deserializeKey(keySerializer);

		// decode the associated timers.
		Set<TimerInternals.TimerData> timers = decodeTimerDataForKey(reader, windowCoder);
		activeTimers.put(key, timers);
	}
	return activeTimers;
}
 
Example #13
Source File: StateCheckpointUtils.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
private static Set<TimerInternals.TimerData> decodeTimerDataForKey(
		StateCheckpointReader reader, Coder<? extends BoundedWindow> windowCoder) throws IOException {

	// decode the timers: first their number and then the content itself.
	int noOfTimers = reader.getInt();
	Set<TimerInternals.TimerData> timers = new HashSet<>(noOfTimers);
	for (int i = 0; i < noOfTimers; i++) {
		String stringKey = reader.getTagToString();
		Instant instant = reader.getTimestamp();
		TimeDomain domain = TimeDomain.values()[reader.getInt()];

		StateNamespace namespace = StateNamespaces.fromString(stringKey, windowCoder);
		timers.add(TimerInternals.TimerData.of(namespace, instant, domain));
	}
	return timers;
}
 
Example #14
Source File: FlinkStateInternals.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
@Override
public void persistState(StateCheckpointWriter checkpointBuilder) throws IOException {
	if (value != null) {
		// serialize the coder.
		byte[] coder = InstantiationUtil.serializeObject(elemCoder);

		// encode the value into a ByteString
		ByteString.Output stream = ByteString.newOutput();
		elemCoder.encode(value, stream, Coder.Context.OUTER);
		ByteString data = stream.toByteString();

		checkpointBuilder.addValueBuilder()
			.setTag(stateKey)
			.setData(coder)
			.setData(data);
	}
}
 
Example #15
Source File: FlinkCreateFunction.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public void flatMap(IN value, Collector<OUT> out) throws Exception {

	for (byte[] element : elements) {
		ByteArrayInputStream bai = new ByteArrayInputStream(element);
		OUT outValue = coder.decode(bai, Coder.Context.OUTER);
		if (outValue == null) {
			// TODO Flink doesn't allow null values in records
			out.collect((OUT) VoidCoderTypeSerializer.VoidValue.INSTANCE);
		} else {
			out.collect(outValue);
		}
	}

	out.close();
}
 
Example #16
Source File: FlinkStateInternals.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
private FlinkInMemoryKeyedCombiningValue(ByteString stateKey,
                                         CombineWithContext.KeyedCombineFnWithContext<? super K, InputT, AccumT, OutputT> combineFn,
                                         Coder<AccumT> accumCoder,
                                         final StateContext<?> stateContext) {
	Preconditions.checkNotNull(combineFn);
	Preconditions.checkNotNull(accumCoder);

	this.stateKey = stateKey;
	this.combineFn = combineFn;
	this.accumCoder = accumCoder;
	this.context = new CombineWithContext.Context() {
		@Override
		public PipelineOptions getPipelineOptions() {
			return stateContext.getPipelineOptions();
		}

		@Override
		public <T> T sideInput(PCollectionView<T> view) {
			return stateContext.sideInput(view);
		}
	};
	accum = combineFn.createAccumulator(key, context);
}
 
Example #17
Source File: FlinkStateInternals.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
@Override
public void persistState(StateCheckpointWriter checkpointBuilder) throws IOException {
	if (!isClear) {
		// serialize the coder.
		byte[] coder = InstantiationUtil.serializeObject(accumCoder);

		// serialize the combiner.
		byte[] combiner = InstantiationUtil.serializeObject(combineFn);

		// encode the accumulator into a ByteString
		ByteString.Output stream = ByteString.newOutput();
		accumCoder.encode(accum, stream, Coder.Context.OUTER);
		ByteString data = stream.toByteString();

		// put the flag that the next serialized element is an accumulator
		checkpointBuilder.addAccumulatorBuilder()
			.setTag(stateKey)
			.setData(coder)
			.setData(combiner)
			.setData(data);
	}
}
 
Example #18
Source File: FlinkStateInternals.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
@Override
public void persistState(StateCheckpointWriter checkpointBuilder) throws IOException {
	if (!contents.isEmpty()) {
		// serialize the coder.
		byte[] coder = InstantiationUtil.serializeObject(elemCoder);

		checkpointBuilder.addListUpdatesBuilder()
				.setTag(stateKey)
				.setData(coder)
				.writeInt(contents.size());

		for (T item : contents) {
			// encode the element
			ByteString.Output stream = ByteString.newOutput();
			elemCoder.encode(item, stream, Coder.Context.OUTER);
			ByteString data = stream.toByteString();

			// add the data to the checkpoint.
			checkpointBuilder.setData(data);
		}
	}
}
 
Example #19
Source File: UnionCoder.java    From flink-dataflow with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public void encode(
		RawUnionValue union,
		OutputStream outStream,
		Context context)
		throws IOException  {
	int index = getIndexForEncoding(union);
	// Write out the union tag.
	VarInt.encode(index, outStream);

	// Write out the actual value.
	Coder<Object> coder = (Coder<Object>) elementCoders.get(index);
	coder.encode(
			union.getValue(),
			outStream,
			context);
}
 
Example #20
Source File: UnboundedSocketSource.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Nullable
@Override
public Coder getCheckpointMarkCoder() {
	// Flink and Dataflow have different checkpointing mechanisms.
	// In our case we do not need a coder.
	return null;
}
 
Example #21
Source File: FlinkDoFnFunction.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Override
public WindowingInternals<IN, OUT> windowingInternals() {
	return new WindowingInternals<IN, OUT>() {
		@Override
		public StateInternals stateInternals() {
			return null;
		}

		@Override
		public void outputWindowedValue(OUT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {

		}

		@Override
		public TimerInternals timerInternals() {
			return null;
		}

		@Override
		public Collection<? extends BoundedWindow> windows() {
			return ImmutableList.of(GlobalWindow.INSTANCE);
		}

		@Override
		public PaneInfo pane() {
			return PaneInfo.NO_FIRING;
		}

		@Override
		public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue<T>> data, Coder<T> elemCoder) throws IOException {
		}

		@Override
		public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
			throw new RuntimeException("sideInput() not implemented.");
		}
	};
}
 
Example #22
Source File: FlinkStreamingTransformTranslators.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(Create.Values<OUT> transform, FlinkStreamingTranslationContext context) {
	PCollection<OUT> output = context.getOutput(transform);
	Iterable<OUT> elements = transform.getElements();

	// we need to serialize the elements to byte arrays, since they might contain
	// elements that are not serializable by Java serialization. We deserialize them
	// in the FlatMap function using the Coder.

	List<byte[]> serializedElements = Lists.newArrayList();
	Coder<OUT> elementCoder = context.getOutput(transform).getCoder();
	for (OUT element: elements) {
		ByteArrayOutputStream bao = new ByteArrayOutputStream();
		try {
			elementCoder.encode(element, bao, Coder.Context.OUTER);
			serializedElements.add(bao.toByteArray());
		} catch (IOException e) {
			throw new RuntimeException("Could not serialize Create elements using Coder: " + e);
		}
	}


	DataStream<Integer> initDataSet = context.getExecutionEnvironment().fromElements(1);

	FlinkStreamingCreateFunction<Integer, OUT> createFunction =
			new FlinkStreamingCreateFunction<>(serializedElements, elementCoder);

	WindowedValue.ValueOnlyWindowedValueCoder<OUT> windowCoder = WindowedValue.getValueOnlyCoder(elementCoder);
	TypeInformation<WindowedValue<OUT>> outputType = new CoderTypeInformation<>(windowCoder);

	DataStream<WindowedValue<OUT>> outputDataStream = initDataSet.flatMap(createFunction)
			.returns(outputType);

	context.setOutputDataStream(context.getOutput(transform), outputDataStream);
}
 
Example #23
Source File: UnionCoder.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
/**
 * Notifies ElementByteSizeObserver about the byte size of the encoded value using this coder.
 */
@Override
public void registerByteSizeObserver(
		RawUnionValue union, ElementByteSizeObserver observer, Context context)
		throws Exception {
	int index = getIndexForEncoding(union);
	// Write out the union tag.
	observer.update(VarInt.getLength(index));
	// Write out the actual value.
	@SuppressWarnings("unchecked")
	Coder<Object> coder = (Coder<Object>) elementCoders.get(index);
	coder.registerByteSizeObserver(union.getValue(), observer, context);
}
 
Example #24
Source File: UnionCoder.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
/**
 * Since this coder uses elementCoders.get(index) and coders that are known to run in constant
 * time, we defer the return value to that coder.
 */
@Override
public boolean isRegisterByteSizeObserverCheap(RawUnionValue union, Context context) {
	int index = getIndexForEncoding(union);
	@SuppressWarnings("unchecked")
	Coder<Object> coder = (Coder<Object>) elementCoders.get(index);
	return coder.isRegisterByteSizeObserverCheap(union.getValue(), context);
}
 
Example #25
Source File: CompleteTimeSeriesAggCombiner.java    From data-timeseries-java with Apache License 2.0 5 votes vote down vote up
@Override
public Accum decode(InputStream inStream,
    com.google.cloud.dataflow.sdk.coders.Coder.Context context) throws CoderException,
    IOException {
  Accum accum = new Accum();
  accum.lastCandle = TSPROTO_CODER.decode(inStream, context.nested());
  accum.candles = LIST_CODER.decode(inStream, context.nested());
  return accum;
}
 
Example #26
Source File: CompleteTimeSeriesAggCombiner.java    From data-timeseries-java with Apache License 2.0 5 votes vote down vote up
@Override
public void encode(Accum value, OutputStream outStream,
    com.google.cloud.dataflow.sdk.coders.Coder.Context context) throws CoderException,
    IOException {

  TSPROTO_CODER.encode(value.lastCandle, outStream, context.nested());
  LIST_CODER.encode(value.candles, outStream, context.nested());

}
 
Example #27
Source File: FlinkStateInternals.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
public void restoreState(StateCheckpointReader checkpointReader) throws IOException {
	int noOfValues = checkpointReader.getInt();
	for (int j = 0; j < noOfValues; j++) {
		ByteString valueContent = checkpointReader.getData();
		T outValue = elemCoder.decode(new ByteArrayInputStream(valueContent.toByteArray()), Coder.Context.OUTER);
		add(outValue);
	}
}
 
Example #28
Source File: FlinkParDoBoundWrapper.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Override
public WindowingInternals<IN, OUT> windowingInternalsHelper(final WindowedValue<IN> inElement, final Collector<WindowedValue<OUT>> collector) {
	return new WindowingInternals<IN, OUT>() {
		@Override
		public StateInternals stateInternals() {
			throw new NullPointerException("StateInternals are not available for ParDo.Bound().");
		}

		@Override
		public void outputWindowedValue(OUT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
			collector.collect(makeWindowedValue(output, timestamp, windows, pane));
		}

		@Override
		public TimerInternals timerInternals() {
			throw new NullPointerException("TimeInternals are not available for ParDo.Bound().");
		}

		@Override
		public Collection<? extends BoundedWindow> windows() {
			return inElement.getWindows();
		}

		@Override
		public PaneInfo pane() {
			return inElement.getPane();
		}

		@Override
		public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue<T>> data, Coder<T> elemCoder) throws IOException {
			throw new RuntimeException("writePCollectionViewData() not supported in Streaming mode.");
		}

		@Override
		public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
			throw new RuntimeException("sideInput() not implemented.");
		}
	};
}
 
Example #29
Source File: FlinkBatchTransformTranslators.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(Read.Bounded<T> transform, FlinkBatchTranslationContext context) {
	String name = transform.getName();
	BoundedSource<T> source = transform.getSource();
	PCollection<T> output = context.getOutput(transform);
	Coder<T> coder = output.getCoder();

	TypeInformation<T> typeInformation = context.getTypeInfo(output);

	DataSource<T> dataSource = new DataSource<>(context.getExecutionEnvironment(),
			new SourceInputFormat<>(source, context.getPipelineOptions()), typeInformation, name);

	context.setOutputDataSet(output, dataSource);
}
 
Example #30
Source File: FlinkBatchTranslationContext.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public <T> TypeInformation<T> getTypeInfo(PInput output) {
	if (output instanceof TypedPValue) {
		Coder<?> outputCoder = ((TypedPValue) output).getCoder();
		if (outputCoder instanceof KvCoder) {
			return new KvCoderTypeInformation((KvCoder) outputCoder);
		} else {
			return new CoderTypeInformation(outputCoder);
		}
	}
	return new GenericTypeInfo<>((Class<T>)Object.class);
}