org.apache.flink.api.java.operators.Grouping Java Examples

The following examples show how to use org.apache.flink.api.java.operators.Grouping. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ScalaAggregateOperator.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Grouped aggregation.
 *
 * @param input
 * @param function
 * @param field
 */
public ScalaAggregateOperator(Grouping<IN> input, Aggregations function, int field) {
	super(Preconditions.checkNotNull(input).getInputDataSet(), input.getInputDataSet().getType());

	Preconditions.checkNotNull(function);

	if (!input.getInputDataSet().getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getInputDataSet().getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// set the aggregation fields
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = input;
}
 
Example #2
Source File: ScalaAggregateOperator.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Grouped aggregation.
 *
 * @param input
 * @param function
 * @param field
 */
public ScalaAggregateOperator(Grouping<IN> input, Aggregations function, int field) {
	super(Preconditions.checkNotNull(input).getInputDataSet(), input.getInputDataSet().getType());

	Preconditions.checkNotNull(function);

	if (!input.getInputDataSet().getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getInputDataSet().getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// set the aggregation fields
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = input;
}
 
Example #3
Source File: ScalaAggregateOperator.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Grouped aggregation.
 *
 * @param input
 * @param function
 * @param field
 */
public ScalaAggregateOperator(Grouping<IN> input, Aggregations function, int field) {
	super(Preconditions.checkNotNull(input).getInputDataSet(), input.getInputDataSet().getType());

	Preconditions.checkNotNull(function);

	if (!input.getInputDataSet().getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getInputDataSet().getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// set the aggregation fields
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = input;
}
 
Example #4
Source File: FlinkBatchTransformTranslators.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(GroupByKey.GroupByKeyOnly<K, V> transform, FlinkBatchTranslationContext context) {
	DataSet<KV<K, V>> inputDataSet = context.getInputDataSet(context.getInput(transform));
	GroupReduceFunction<KV<K, V>, KV<K, Iterable<V>>> groupReduceFunction = new FlinkKeyedListAggregationFunction<>();

	TypeInformation<KV<K, Iterable<V>>> typeInformation = context.getTypeInfo(context.getOutput(transform));

	Grouping<KV<K, V>> grouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet.getType()));

	GroupReduceOperator<KV<K, V>, KV<K, Iterable<V>>> outputDataSet =
			new GroupReduceOperator<>(grouping, typeInformation, groupReduceFunction, transform.getName());
	context.setOutputDataSet(context.getOutput(transform), outputDataSet);
}
 
Example #5
Source File: FlinkBatchTransformTranslators.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(GroupByKey<K, V> transform, FlinkBatchTranslationContext context) {
	DataSet<KV<K, V>> inputDataSet = context.getInputDataSet(context.getInput(transform));
	GroupReduceFunction<KV<K, V>, KV<K, Iterable<V>>> groupReduceFunction = new FlinkKeyedListAggregationFunction<>();

	TypeInformation<KV<K, Iterable<V>>> typeInformation = context.getTypeInfo(context.getOutput(transform));

	Grouping<KV<K, V>> grouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet.getType()));

	GroupReduceOperator<KV<K, V>, KV<K, Iterable<V>>> outputDataSet =
			new GroupReduceOperator<>(grouping, typeInformation, groupReduceFunction, transform.getName());

	context.setOutputDataSet(context.getOutput(transform), outputDataSet);
}
 
Example #6
Source File: FlinkBatchTransformTranslators.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void translateNode(
    PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, Iterable<InputT>>>> transform,
    FlinkBatchTranslationContext context) {

  // for now, this is copied from the Combine.PerKey translator. Once we have the new runner API
  // we can replace GroupByKey by a Combine.PerKey with the Concatenate CombineFn

  DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
      context.getInputDataSet(context.getInput(transform));

  Combine.CombineFn<InputT, List<InputT>, List<InputT>> combineFn = new Concatenate<>();

  KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) context.getInput(transform).getCoder();

  Coder<List<InputT>> accumulatorCoder;

  try {
    accumulatorCoder =
        combineFn.getAccumulatorCoder(
            context.getInput(transform).getPipeline().getCoderRegistry(),
            inputCoder.getValueCoder());
  } catch (CannotProvideCoderException e) {
    throw new RuntimeException(e);
  }

  WindowingStrategy<?, ?> windowingStrategy =
      context.getInput(transform).getWindowingStrategy();

  TypeInformation<WindowedValue<KV<K, List<InputT>>>> partialReduceTypeInfo =
      new CoderTypeInformation<>(
          WindowedValue.getFullCoder(
              KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder),
              windowingStrategy.getWindowFn().windowCoder()));

  Grouping<WindowedValue<KV<K, InputT>>> inputGrouping =
      inputDataSet.groupBy(new KvKeySelector<>(inputCoder.getKeyCoder()));

  @SuppressWarnings("unchecked")
  WindowingStrategy<Object, BoundedWindow> boundedStrategy =
      (WindowingStrategy<Object, BoundedWindow>) windowingStrategy;

  FlinkPartialReduceFunction<K, InputT, List<InputT>, ?> partialReduceFunction =
      new FlinkPartialReduceFunction<>(
          combineFn, boundedStrategy, Collections.emptyMap(), context.getPipelineOptions());

  FlinkReduceFunction<K, List<InputT>, List<InputT>, ?> reduceFunction =
      new FlinkReduceFunction<>(
          combineFn, boundedStrategy, Collections.emptyMap(), context.getPipelineOptions());

  // Partially GroupReduce the values into the intermediate format AccumT (combine)
  String fullName = getCurrentTransformName(context);
  GroupCombineOperator<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, List<InputT>>>>
      groupCombine =
          new GroupCombineOperator<>(
              inputGrouping,
              partialReduceTypeInfo,
              partialReduceFunction,
              "GroupCombine: " + fullName);

  Grouping<WindowedValue<KV<K, List<InputT>>>> intermediateGrouping =
      groupCombine.groupBy(new KvKeySelector<>(inputCoder.getKeyCoder()));

  // Fully reduce the values and create output format VO
  GroupReduceOperator<WindowedValue<KV<K, List<InputT>>>, WindowedValue<KV<K, List<InputT>>>>
      outputDataSet =
          new GroupReduceOperator<>(
              intermediateGrouping, partialReduceTypeInfo, reduceFunction, fullName);

  context.setOutputDataSet(context.getOutput(transform), outputDataSet);
}
 
Example #7
Source File: FlinkBatchTransformTranslators.java    From flink-dataflow with Apache License 2.0 4 votes vote down vote up
@Override
public void translateNode(Combine.PerKey<K, VI, VO> transform, FlinkBatchTranslationContext context) {
	DataSet<KV<K, VI>> inputDataSet = context.getInputDataSet(context.getInput(transform));

	@SuppressWarnings("unchecked")
	Combine.KeyedCombineFn<K, VI, VA, VO> keyedCombineFn = (Combine.KeyedCombineFn<K, VI, VA, VO>) transform.getFn();

	KvCoder<K, VI> inputCoder = (KvCoder<K, VI>) context.getInput(transform).getCoder();

	Coder<VA> accumulatorCoder =
			null;
	try {
		accumulatorCoder = keyedCombineFn.getAccumulatorCoder(context.getInput(transform).getPipeline().getCoderRegistry(), inputCoder.getKeyCoder(), inputCoder.getValueCoder());
	} catch (CannotProvideCoderException e) {
		e.printStackTrace();
		// TODO
	}

	TypeInformation<KV<K, VI>> kvCoderTypeInformation = new KvCoderTypeInformation<>(inputCoder);
	TypeInformation<KV<K, VA>> partialReduceTypeInfo = new KvCoderTypeInformation<>(KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder));

	Grouping<KV<K, VI>> inputGrouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{"key"}, kvCoderTypeInformation));

	FlinkPartialReduceFunction<K, VI, VA> partialReduceFunction = new FlinkPartialReduceFunction<>(keyedCombineFn);

	// Partially GroupReduce the values into the intermediate format VA (combine)
	GroupCombineOperator<KV<K, VI>, KV<K, VA>> groupCombine =
			new GroupCombineOperator<>(inputGrouping, partialReduceTypeInfo, partialReduceFunction,
					"GroupCombine: " + transform.getName());

	// Reduce fully to VO
	GroupReduceFunction<KV<K, VA>, KV<K, VO>> reduceFunction = new FlinkReduceFunction<>(keyedCombineFn);

	TypeInformation<KV<K, VO>> reduceTypeInfo = context.getTypeInfo(context.getOutput(transform));

	Grouping<KV<K, VA>> intermediateGrouping = new UnsortedGrouping<>(groupCombine, new Keys.ExpressionKeys<>(new String[]{"key"}, groupCombine.getType()));

	// Fully reduce the values and create output format VO
	GroupReduceOperator<KV<K, VA>, KV<K, VO>> outputDataSet =
			new GroupReduceOperator<>(intermediateGrouping, reduceTypeInfo, reduceFunction, transform.getName());

	context.setOutputDataSet(context.getOutput(transform), outputDataSet);
}