Java Code Examples for com.google.cloud.dataflow.sdk.transforms.Combine#KeyedCombineFn

The following examples show how to use com.google.cloud.dataflow.sdk.transforms.Combine#KeyedCombineFn . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FlinkGroupAlsoByWindowWrapper.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
/**
 * Creates an DataStream where elements are grouped in windows based on the specified windowing strategy.
 * This method assumes that <b>elements are already grouped by key</b>.
 * <p/>
 * The difference with {@link #createForIterable(PipelineOptions, PCollection, KeyedStream)}
 * is that this method assumes that a combiner function is provided
 * (see {@link com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn}).
 * A combiner helps at increasing the speed and, in most of the cases, reduce the per-window state.
 *
 * @param options            the general job configuration options.
 * @param input              the input Dataflow {@link com.google.cloud.dataflow.sdk.values.PCollection}.
 * @param groupedStreamByKey the input stream, it is assumed to already be grouped by key.
 * @param combiner           the combiner to be used.
 * @param outputKvCoder      the type of the output values.
 */
public static <K, VIN, VACC, VOUT> DataStream<WindowedValue<KV<K, VOUT>>> create(
		PipelineOptions options,
		PCollection input,
		KeyedStream<WindowedValue<KV<K, VIN>>, K> groupedStreamByKey,
		Combine.KeyedCombineFn<K, VIN, VACC, VOUT> combiner,
		KvCoder<K, VOUT> outputKvCoder) {
	Preconditions.checkNotNull(options);

	KvCoder<K, VIN> inputKvCoder = (KvCoder<K, VIN>) input.getCoder();
	FlinkGroupAlsoByWindowWrapper windower = new FlinkGroupAlsoByWindowWrapper<>(options,
			input.getPipeline().getCoderRegistry(), input.getWindowingStrategy(), inputKvCoder, combiner);

	Coder<WindowedValue<KV<K, VOUT>>> windowedOutputElemCoder = WindowedValue.FullWindowedValueCoder.of(
			outputKvCoder,
			input.getWindowingStrategy().getWindowFn().windowCoder());

	CoderTypeInformation<WindowedValue<KV<K, VOUT>>> outputTypeInfo =
			new CoderTypeInformation<>(windowedOutputElemCoder);

	DataStream<WindowedValue<KV<K, VOUT>>> groupedByKeyAndWindow = groupedStreamByKey
			.transform("GroupByWindowWithCombiner",
					new CoderTypeInformation<>(outputKvCoder),
					windower)
			.returns(outputTypeInfo);

	return groupedByKeyAndWindow;
}
 
Example 2
Source File: FlinkGroupAlsoByWindowWrapper.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
public static <K, VIN, VACC, VOUT> FlinkGroupAlsoByWindowWrapper
createForTesting(PipelineOptions options,
                 CoderRegistry registry,
                 WindowingStrategy<KV<K, VIN>, BoundedWindow> windowingStrategy,
                 KvCoder<K, VIN> inputCoder,
                 Combine.KeyedCombineFn<K, VIN, VACC, VOUT> combiner) {
	Preconditions.checkNotNull(options);

	return new FlinkGroupAlsoByWindowWrapper(options, registry, windowingStrategy, inputCoder, combiner);
}
 
Example 3
Source File: FlinkGroupAlsoByWindowWrapper.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
private FlinkGroupAlsoByWindowWrapper(PipelineOptions options,
                                      CoderRegistry registry,
                                      WindowingStrategy<KV<K, VIN>, BoundedWindow> windowingStrategy,
                                      KvCoder<K, VIN> inputCoder,
                                      Combine.KeyedCombineFn<K, VIN, VACC, VOUT> combiner) {
	Preconditions.checkNotNull(options);

	this.options = Preconditions.checkNotNull(options);
	this.coderRegistry = Preconditions.checkNotNull(registry);
	this.inputKvCoder = Preconditions.checkNotNull(inputCoder);//(KvCoder<K, VIN>) input.getCoder();
	this.windowingStrategy = Preconditions.checkNotNull(windowingStrategy);//input.getWindowingStrategy();
	this.combineFn = combiner;
	this.operator = createGroupAlsoByWindowOperator();
	this.chainingStrategy = ChainingStrategy.ALWAYS;
}
 
Example 4
Source File: FlinkStateInternals.java    From flink-dataflow with Apache License 2.0 4 votes vote down vote up
private FlinkInMemoryKeyedCombiningValue(ByteString stateKey,
                                         Combine.KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn,
                                         Coder<AccumT> accumCoder,
                                         final StateContext<?> stateContext) {
	this(stateKey, withContext(combineFn), accumCoder, stateContext);
}
 
Example 5
Source File: FlinkBatchTransformTranslators.java    From flink-dataflow with Apache License 2.0 4 votes vote down vote up
@Override
public void translateNode(Combine.PerKey<K, VI, VO> transform, FlinkBatchTranslationContext context) {
	DataSet<KV<K, VI>> inputDataSet = context.getInputDataSet(context.getInput(transform));

	@SuppressWarnings("unchecked")
	Combine.KeyedCombineFn<K, VI, VA, VO> keyedCombineFn = (Combine.KeyedCombineFn<K, VI, VA, VO>) transform.getFn();

	KvCoder<K, VI> inputCoder = (KvCoder<K, VI>) context.getInput(transform).getCoder();

	Coder<VA> accumulatorCoder =
			null;
	try {
		accumulatorCoder = keyedCombineFn.getAccumulatorCoder(context.getInput(transform).getPipeline().getCoderRegistry(), inputCoder.getKeyCoder(), inputCoder.getValueCoder());
	} catch (CannotProvideCoderException e) {
		e.printStackTrace();
		// TODO
	}

	TypeInformation<KV<K, VI>> kvCoderTypeInformation = new KvCoderTypeInformation<>(inputCoder);
	TypeInformation<KV<K, VA>> partialReduceTypeInfo = new KvCoderTypeInformation<>(KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder));

	Grouping<KV<K, VI>> inputGrouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{"key"}, kvCoderTypeInformation));

	FlinkPartialReduceFunction<K, VI, VA> partialReduceFunction = new FlinkPartialReduceFunction<>(keyedCombineFn);

	// Partially GroupReduce the values into the intermediate format VA (combine)
	GroupCombineOperator<KV<K, VI>, KV<K, VA>> groupCombine =
			new GroupCombineOperator<>(inputGrouping, partialReduceTypeInfo, partialReduceFunction,
					"GroupCombine: " + transform.getName());

	// Reduce fully to VO
	GroupReduceFunction<KV<K, VA>, KV<K, VO>> reduceFunction = new FlinkReduceFunction<>(keyedCombineFn);

	TypeInformation<KV<K, VO>> reduceTypeInfo = context.getTypeInfo(context.getOutput(transform));

	Grouping<KV<K, VA>> intermediateGrouping = new UnsortedGrouping<>(groupCombine, new Keys.ExpressionKeys<>(new String[]{"key"}, groupCombine.getType()));

	// Fully reduce the values and create output format VO
	GroupReduceOperator<KV<K, VA>, KV<K, VO>> outputDataSet =
			new GroupReduceOperator<>(intermediateGrouping, reduceTypeInfo, reduceFunction, transform.getName());

	context.setOutputDataSet(context.getOutput(transform), outputDataSet);
}
 
Example 6
Source File: FlinkPartialReduceFunction.java    From flink-dataflow with Apache License 2.0 4 votes vote down vote up
public FlinkPartialReduceFunction(Combine.KeyedCombineFn<K, VI, VA, ?>
		                                  keyedCombineFn) {
	this.keyedCombineFn = keyedCombineFn;
}
 
Example 7
Source File: FlinkReduceFunction.java    From flink-dataflow with Apache License 2.0 4 votes vote down vote up
public FlinkReduceFunction(Combine.KeyedCombineFn<K, ?, VA, VO> keyedCombineFn) {
	this.keyedCombineFn = keyedCombineFn;
}