Java Code Examples for org.apache.flink.api.java.DataSet#reduceGroup()

The following examples show how to use org.apache.flink.api.java.DataSet#reduceGroup() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GroupReduceITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCorrectnessOfAllGroupReduceForCustomTypes() throws Exception {
	/*
	 * check correctness of all-groupreduce for custom types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
	DataSet<CustomType> reduceDs = ds.reduceGroup(new AllAddingCustomTypeGroupReduce());

	List<CustomType> result = reduceDs.collect();

	String expected = "91,210,Hello!";

	compareResultAsText(result, expected);
}
 
Example 2
Source File: ReduceWithCombinerITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testForkingReduceOnNonKeyedDataset() throws Exception {

	// set up the execution environment
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	// creates the input data and distributes them evenly among the available downstream tasks
	DataSet<Tuple2<Integer, Boolean>> input = createNonKeyedInput(env);

	DataSet<Tuple2<Integer, Boolean>> r1 = input.reduceGroup(new NonKeyedCombReducer());
	DataSet<Tuple2<Integer, Boolean>> r2 = input.reduceGroup(new NonKeyedGroupCombReducer());

	List<Tuple2<Integer, Boolean>> actual = r1.union(r2).collect();
	String expected = "10,true\n10,true\n";
	compareResultAsTuples(actual, expected);
}
 
Example 3
Source File: GroupReduceITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCorrectnessOfAllGroupReduceForCustomTypes() throws Exception {
	/*
	 * check correctness of all-groupreduce for custom types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
	DataSet<CustomType> reduceDs = ds.reduceGroup(new AllAddingCustomTypeGroupReduce());

	List<CustomType> result = reduceDs.collect();

	String expected = "91,210,Hello!";

	compareResultAsText(result, expected);
}
 
Example 4
Source File: GroupReduceITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCorrectnessOfAllGroupReduceForTuples() throws Exception {
	/*
	 * check correctness of all-groupreduce for tuples
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.reduceGroup(new AllAddingTuple3GroupReduce());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "231,91,Hello World\n";

	compareResultAsTuples(result, expected);
}
 
Example 5
Source File: HadoopReduceCombineFunctionITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testUngroupedHadoopReducer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
			map(new Mapper2());

	DataSet<Tuple2<IntWritable, IntWritable>> sum = ds.
			reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
					new SumReducer(), new SumReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	sum.writeAsText(resultPath);
	env.execute();

	String expected = "(0,231)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}
 
Example 6
Source File: QuantileDiscretizerTrainBatchOp.java    From Alink with Apache License 2.0 6 votes vote down vote up
public static DataSet<FeatureBorder> transformModelToFeatureBorder(DataSet<Row> modelDataSet) {
	return modelDataSet
		.reduceGroup(
			new GroupReduceFunction<Row, FeatureBorder>() {
				@Override
				public void reduce(Iterable<Row> values, Collector<FeatureBorder> out) throws Exception {
					List<Row> list = new ArrayList<>();
					values.forEach(list::add);
					QuantileDiscretizerModelDataConverter model
						= new QuantileDiscretizerModelDataConverter().load(list);
					for (Map.Entry<String, FeatureBorder> entry : model.data.entrySet()) {
						out.collect(entry.getValue());
					}
				}
			}
		);
}
 
Example 7
Source File: ReduceWithCombinerITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testForkingReduceOnNonKeyedDataset() throws Exception {

	// set up the execution environment
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	// creates the input data and distributes them evenly among the available downstream tasks
	DataSet<Tuple2<Integer, Boolean>> input = createNonKeyedInput(env);

	DataSet<Tuple2<Integer, Boolean>> r1 = input.reduceGroup(new NonKeyedCombReducer());
	DataSet<Tuple2<Integer, Boolean>> r2 = input.reduceGroup(new NonKeyedGroupCombReducer());

	List<Tuple2<Integer, Boolean>> actual = r1.union(r2).collect();
	String expected = "10,true\n10,true\n";
	compareResultAsTuples(actual, expected);
}
 
Example 8
Source File: GroupReduceITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCorrectnessOfAllGroupReduceForTuples() throws Exception {
	/*
	 * check correctness of all-groupreduce for tuples
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.reduceGroup(new AllAddingTuple3GroupReduce());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "231,91,Hello World\n";

	compareResultAsTuples(result, expected);
}
 
Example 9
Source File: HadoopReduceCombineFunctionITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testUngroupedHadoopReducer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
			map(new Mapper2());

	DataSet<Tuple2<IntWritable, IntWritable>> sum = ds.
			reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
					new SumReducer(), new SumReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	sum.writeAsText(resultPath);
	env.execute();

	String expected = "(0,231)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}
 
Example 10
Source File: ReduceWithCombinerITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testForkingReduceOnNonKeyedDataset() throws Exception {

	// set up the execution environment
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	// creates the input data and distributes them evenly among the available downstream tasks
	DataSet<Tuple2<Integer, Boolean>> input = createNonKeyedInput(env);

	DataSet<Tuple2<Integer, Boolean>> r1 = input.reduceGroup(new NonKeyedCombReducer());
	DataSet<Tuple2<Integer, Boolean>> r2 = input.reduceGroup(new NonKeyedGroupCombReducer());

	List<Tuple2<Integer, Boolean>> actual = r1.union(r2).collect();
	String expected = "10,true\n10,true\n";
	compareResultAsTuples(actual, expected);
}
 
Example 11
Source File: GroupReduceITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testCorrectnessOfAllGroupReduceForCustomTypes() throws Exception {
	/*
	 * check correctness of all-groupreduce for custom types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
	DataSet<CustomType> reduceDs = ds.reduceGroup(new AllAddingCustomTypeGroupReduce());

	List<CustomType> result = reduceDs.collect();

	String expected = "91,210,Hello!";

	compareResultAsText(result, expected);
}
 
Example 12
Source File: GroupReduceITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testCorrectnessOfAllGroupReduceForTuples() throws Exception {
	/*
	 * check correctness of all-groupreduce for tuples
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.reduceGroup(new AllAddingTuple3GroupReduce());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "231,91,Hello World\n";

	compareResultAsTuples(result, expected);
}
 
Example 13
Source File: HadoopReduceCombineFunctionITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testUngroupedHadoopReducer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
			map(new Mapper2());

	DataSet<Tuple2<IntWritable, IntWritable>> sum = ds.
			reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
					new SumReducer(), new SumReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	sum.writeAsText(resultPath);
	env.execute();

	String expected = "(0,231)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}
 
Example 14
Source File: HadoopReduceFunctionITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testUngroupedHadoopReducer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);

	DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
			reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new AllCommentCntReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	commentCnts.writeAsText(resultPath);
	env.execute();

	String expected = "(42,15)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}
 
Example 15
Source File: QuantileDiscretizerTrainBatchOp.java    From Alink with Apache License 2.0 4 votes vote down vote up
@Override
public QuantileDiscretizerTrainBatchOp linkFrom(BatchOperator<?>... inputs) {
	BatchOperator<?> in = checkAndGetFirst(inputs);
	if (getParams().contains(QuantileDiscretizerTrainParams.NUM_BUCKETS) && getParams().contains(
		QuantileDiscretizerTrainParams.NUM_BUCKETS_ARRAY)) {
		throw new RuntimeException("It can not set num_buckets and num_buckets_array at the same time.");
	}

	String[] quantileColNames =
		getSelectedCols();

	int[] quantileNum = null;

	if (getParams().contains(QuantileDiscretizerTrainParams.NUM_BUCKETS)) {
		quantileNum = new int[quantileColNames.length];
		Arrays.fill(quantileNum, getNumBuckets());
	} else {
		quantileNum = Arrays.stream(getNumBucketsArray()).mapToInt(Integer::intValue).toArray();
	}

	/* filter the selected column from input */
	DataSet<Row> input = Preprocessing.select(in, quantileColNames).getDataSet();

	DataSet<Row> quantile = quantile(
		input, quantileNum,
		getParams().get(HasRoundMode.ROUND_MODE),
		getParams().get(Preprocessing.ZERO_AS_MISSING)
	);

	quantile = quantile.reduceGroup(
		new SerializeModel(
			getParams(),
			quantileColNames,
			TableUtil.findColTypesWithAssertAndHint(in.getSchema(), quantileColNames),
			BinTypes.BinDivideType.QUANTILE
		)
	);

	/* set output */
	setOutput(quantile, new QuantileDiscretizerModelDataConverter().getModelSchema());

	return this;
}
 
Example 16
Source File: HadoopReduceFunctionITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testUngroupedHadoopReducer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);

	DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
			reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new AllCommentCntReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	commentCnts.writeAsText(resultPath);
	env.execute();

	String expected = "(42,15)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}
 
Example 17
Source File: HadoopReduceFunctionITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testUngroupedHadoopReducer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);

	DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
			reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new AllCommentCntReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	commentCnts.writeAsText(resultPath);
	env.execute();

	String expected = "(42,15)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}