Java Code Examples for org.apache.flink.api.java.DataSet#reduceGroup()

The following examples show how to use org.apache.flink.api.java.DataSet#reduceGroup() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: GroupReduceITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testCorrectnessOfAllGroupReduceForCustomTypes() throws Exception {
	/*
	 * check correctness of all-groupreduce for custom types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
	DataSet<CustomType> reduceDs = ds.reduceGroup(new AllAddingCustomTypeGroupReduce());

	List<CustomType> result = reduceDs.collect();

	String expected = "91,210,Hello!";

	compareResultAsText(result, expected);
}

Example 2

Source File: ReduceWithCombinerITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testForkingReduceOnNonKeyedDataset() throws Exception {

	// set up the execution environment
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	// creates the input data and distributes them evenly among the available downstream tasks
	DataSet<Tuple2<Integer, Boolean>> input = createNonKeyedInput(env);

	DataSet<Tuple2<Integer, Boolean>> r1 = input.reduceGroup(new NonKeyedCombReducer());
	DataSet<Tuple2<Integer, Boolean>> r2 = input.reduceGroup(new NonKeyedGroupCombReducer());

	List<Tuple2<Integer, Boolean>> actual = r1.union(r2).collect();
	String expected = "10,true\n10,true\n";
	compareResultAsTuples(actual, expected);
}

Example 3

Source File: GroupReduceITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testCorrectnessOfAllGroupReduceForCustomTypes() throws Exception {
	/*
	 * check correctness of all-groupreduce for custom types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
	DataSet<CustomType> reduceDs = ds.reduceGroup(new AllAddingCustomTypeGroupReduce());

	List<CustomType> result = reduceDs.collect();

	String expected = "91,210,Hello!";

	compareResultAsText(result, expected);
}

Example 4

Source File: GroupReduceITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testCorrectnessOfAllGroupReduceForTuples() throws Exception {
	/*
	 * check correctness of all-groupreduce for tuples
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.reduceGroup(new AllAddingTuple3GroupReduce());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "231,91,Hello World\n";

	compareResultAsTuples(result, expected);
}

Example 5

Source File: HadoopReduceCombineFunctionITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testUngroupedHadoopReducer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
			map(new Mapper2());

	DataSet<Tuple2<IntWritable, IntWritable>> sum = ds.
			reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
					new SumReducer(), new SumReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	sum.writeAsText(resultPath);
	env.execute();

	String expected = "(0,231)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}

Example 6

Source File: QuantileDiscretizerTrainBatchOp.java From Alink with Apache License 2.0

6 votes

public static DataSet<FeatureBorder> transformModelToFeatureBorder(DataSet<Row> modelDataSet) {
	return modelDataSet
		.reduceGroup(
			new GroupReduceFunction<Row, FeatureBorder>() {
				@Override
				public void reduce(Iterable<Row> values, Collector<FeatureBorder> out) throws Exception {
					List<Row> list = new ArrayList<>();
					values.forEach(list::add);
					QuantileDiscretizerModelDataConverter model
						= new QuantileDiscretizerModelDataConverter().load(list);
					for (Map.Entry<String, FeatureBorder> entry : model.data.entrySet()) {
						out.collect(entry.getValue());
					}
				}
			}
		);
}

Example 7

Source File: ReduceWithCombinerITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testForkingReduceOnNonKeyedDataset() throws Exception {

	// set up the execution environment
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	// creates the input data and distributes them evenly among the available downstream tasks
	DataSet<Tuple2<Integer, Boolean>> input = createNonKeyedInput(env);

	DataSet<Tuple2<Integer, Boolean>> r1 = input.reduceGroup(new NonKeyedCombReducer());
	DataSet<Tuple2<Integer, Boolean>> r2 = input.reduceGroup(new NonKeyedGroupCombReducer());

	List<Tuple2<Integer, Boolean>> actual = r1.union(r2).collect();
	String expected = "10,true\n10,true\n";
	compareResultAsTuples(actual, expected);
}

Example 8

Source File: GroupReduceITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testCorrectnessOfAllGroupReduceForTuples() throws Exception {
	/*
	 * check correctness of all-groupreduce for tuples
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.reduceGroup(new AllAddingTuple3GroupReduce());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "231,91,Hello World\n";

	compareResultAsTuples(result, expected);
}

Example 9

Source File: HadoopReduceCombineFunctionITCase.java From flink with Apache License 2.0

6 votes

@Test
public void testUngroupedHadoopReducer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
			map(new Mapper2());

	DataSet<Tuple2<IntWritable, IntWritable>> sum = ds.
			reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
					new SumReducer(), new SumReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	sum.writeAsText(resultPath);
	env.execute();

	String expected = "(0,231)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}

Example 10

Source File: ReduceWithCombinerITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testForkingReduceOnNonKeyedDataset() throws Exception {

	// set up the execution environment
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	// creates the input data and distributes them evenly among the available downstream tasks
	DataSet<Tuple2<Integer, Boolean>> input = createNonKeyedInput(env);

	DataSet<Tuple2<Integer, Boolean>> r1 = input.reduceGroup(new NonKeyedCombReducer());
	DataSet<Tuple2<Integer, Boolean>> r2 = input.reduceGroup(new NonKeyedGroupCombReducer());

	List<Tuple2<Integer, Boolean>> actual = r1.union(r2).collect();
	String expected = "10,true\n10,true\n";
	compareResultAsTuples(actual, expected);
}

Example 11

Source File: GroupReduceITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testCorrectnessOfAllGroupReduceForCustomTypes() throws Exception {
	/*
	 * check correctness of all-groupreduce for custom types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
	DataSet<CustomType> reduceDs = ds.reduceGroup(new AllAddingCustomTypeGroupReduce());

	List<CustomType> result = reduceDs.collect();

	String expected = "91,210,Hello!";

	compareResultAsText(result, expected);
}

Example 12

Source File: GroupReduceITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testCorrectnessOfAllGroupReduceForTuples() throws Exception {
	/*
	 * check correctness of all-groupreduce for tuples
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.reduceGroup(new AllAddingTuple3GroupReduce());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "231,91,Hello World\n";

	compareResultAsTuples(result, expected);
}

Example 13

Source File: HadoopReduceCombineFunctionITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testUngroupedHadoopReducer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
			map(new Mapper2());

	DataSet<Tuple2<IntWritable, IntWritable>> sum = ds.
			reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
					new SumReducer(), new SumReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	sum.writeAsText(resultPath);
	env.execute();

	String expected = "(0,231)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}

Example 14

Source File: HadoopReduceFunctionITCase.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testUngroupedHadoopReducer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);

	DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
			reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new AllCommentCntReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	commentCnts.writeAsText(resultPath);
	env.execute();

	String expected = "(42,15)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}

Example 15

Source File: QuantileDiscretizerTrainBatchOp.java From Alink with Apache License 2.0

4 votes

@Override
public QuantileDiscretizerTrainBatchOp linkFrom(BatchOperator<?>... inputs) {
	BatchOperator<?> in = checkAndGetFirst(inputs);
	if (getParams().contains(QuantileDiscretizerTrainParams.NUM_BUCKETS) && getParams().contains(
		QuantileDiscretizerTrainParams.NUM_BUCKETS_ARRAY)) {
		throw new RuntimeException("It can not set num_buckets and num_buckets_array at the same time.");
	}

	String[] quantileColNames =
		getSelectedCols();

	int[] quantileNum = null;

	if (getParams().contains(QuantileDiscretizerTrainParams.NUM_BUCKETS)) {
		quantileNum = new int[quantileColNames.length];
		Arrays.fill(quantileNum, getNumBuckets());
	} else {
		quantileNum = Arrays.stream(getNumBucketsArray()).mapToInt(Integer::intValue).toArray();
	}

	/* filter the selected column from input */
	DataSet<Row> input = Preprocessing.select(in, quantileColNames).getDataSet();

	DataSet<Row> quantile = quantile(
		input, quantileNum,
		getParams().get(HasRoundMode.ROUND_MODE),
		getParams().get(Preprocessing.ZERO_AS_MISSING)
	);

	quantile = quantile.reduceGroup(
		new SerializeModel(
			getParams(),
			quantileColNames,
			TableUtil.findColTypesWithAssertAndHint(in.getSchema(), quantileColNames),
			BinTypes.BinDivideType.QUANTILE
		)
	);

	/* set output */
	setOutput(quantile, new QuantileDiscretizerModelDataConverter().getModelSchema());

	return this;
}

Example 16

Source File: HadoopReduceFunctionITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testUngroupedHadoopReducer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);

	DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
			reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new AllCommentCntReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	commentCnts.writeAsText(resultPath);
	env.execute();

	String expected = "(42,15)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}

Example 17

Source File: HadoopReduceFunctionITCase.java From flink with Apache License 2.0

4 votes

@Test
public void testUngroupedHadoopReducer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);

	DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
			reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new AllCommentCntReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	commentCnts.writeAsText(resultPath);
	env.execute();

	String expected = "(42,15)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}