Java Code Examples for org.apache.flink.api.java.DataSet#groupBy()

The following examples show how to use org.apache.flink.api.java.DataSet#groupBy() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GroupingTest.java    From flink with Apache License 2.0 7 votes vote down vote up
@Test
@SuppressWarnings("serial")
public void testGroupByKeySelector3() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	this.customTypeData.add(new CustomType());

	try {
		DataSet<CustomType> customDs = env.fromCollection(customTypeData);
		// should not work
		customDs.groupBy(
				new KeySelector<GroupingTest.CustomType, CustomType>() {
					@Override
					public CustomType getKey(CustomType value) {
						return value;
					}
				});
	} catch (Exception e) {
		Assert.fail();
	}
}
 
Example 2
Source File: ReduceWithCombinerITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testForkingReduceOnKeyedDataset() throws Exception {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	// creates the input data and distributes them evenly among the available downstream tasks
	DataSet<Tuple3<String, Integer, Boolean>> input = createKeyedInput(env);

	UnsortedGrouping<Tuple3<String, Integer, Boolean>> counts = input.groupBy(0);

	DataSet<Tuple3<String, Integer, Boolean>> r1 = counts.reduceGroup(new KeyedCombReducer());
	DataSet<Tuple3<String, Integer, Boolean>> r2 = counts.reduceGroup(new KeyedGroupCombReducer());

	List<Tuple3<String, Integer, Boolean>> actual = r1.union(r2).collect();
	String expected = "k1,6,true\n" +
		"k2,4,true\n" +
		"k1,6,true\n" +
		"k2,4,true\n";
	compareResultAsTuples(actual, expected);
}
 
Example 3
Source File: GroupingTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test(expected = InvalidProgramException.class)
@SuppressWarnings("serial")
public void testGroupByKeySelector5() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	this.customTypeData.add(new CustomType());

	DataSet<CustomType> customDs = env.fromCollection(customTypeData);
	// should not work
	customDs.groupBy(
			new KeySelector<GroupingTest.CustomType, CustomType2>() {
				@Override
				public CustomType2 getKey(CustomType value) {
					return new CustomType2();
				}
			});
}
 
Example 4
Source File: GroupingTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test(expected = InvalidProgramException.class)
@SuppressWarnings("serial")
public void testGroupByKeySelector5() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	this.customTypeData.add(new CustomType());

	DataSet<CustomType> customDs = env.fromCollection(customTypeData);
	// should not work
	customDs.groupBy(
			new KeySelector<GroupingTest.CustomType, CustomType2>() {
				@Override
				public CustomType2 getKey(CustomType value) {
					return new CustomType2();
				}
			});
}
 
Example 5
Source File: ReduceWithCombinerITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testForkingReduceOnKeyedDatasetWithSelection() throws Exception {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	// creates the input data and distributes them evenly among the available downstream tasks
	DataSet<Tuple3<String, Integer, Boolean>> input = createKeyedInput(env);

	UnsortedGrouping<Tuple3<String, Integer, Boolean>> counts = input.groupBy(new KeySelectorX());

	DataSet<Tuple3<String, Integer, Boolean>> r1 = counts.reduceGroup(new KeyedCombReducer());
	DataSet<Tuple3<String, Integer, Boolean>> r2 = counts.reduceGroup(new KeyedGroupCombReducer());

	List<Tuple3<String, Integer, Boolean>> actual = r1.union(r2).collect();
	String expected = "k1,6,true\n" +
		"k2,4,true\n" +
		"k1,6,true\n" +
		"k2,4,true\n";

	compareResultAsTuples(actual, expected);
}
 
Example 6
Source File: GroupingTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
@SuppressWarnings("serial")
public void testGroupByKeySelector2() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	this.customTypeData.add(new CustomType());

	try {
		DataSet<CustomType> customDs = env.fromCollection(customTypeData);
		// should work
		customDs.groupBy(
				new KeySelector<GroupingTest.CustomType, Tuple2<Integer, Long>>() {
					@Override
					public Tuple2<Integer, Long> getKey(CustomType value) {
						return new Tuple2<Integer, Long>(value.myInt, value.myLong);
				}
		});
	} catch (Exception e) {
		Assert.fail();
	}
}
 
Example 7
Source File: GroupingTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
@SuppressWarnings("serial")
public void testGroupByKeySelector1() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	this.customTypeData.add(new CustomType());

	try {
		DataSet<CustomType> customDs = env.fromCollection(customTypeData);
		// should work
		customDs.groupBy(
				new KeySelector<GroupingTest.CustomType, Long>() {

					@Override
					public Long getKey(CustomType value) {
						return value.myLong;
				}
		});
	} catch (Exception e) {
		Assert.fail();
	}
}
 
Example 8
Source File: GroupingTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
@SuppressWarnings("serial")
public void testGroupByKeySelector1() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	this.customTypeData.add(new CustomType());

	try {
		DataSet<CustomType> customDs = env.fromCollection(customTypeData);
		// should work
		customDs.groupBy(
				new KeySelector<GroupingTest.CustomType, Long>() {

					@Override
					public Long getKey(CustomType value) {
						return value.myLong;
				}
		});
	} catch (Exception e) {
		Assert.fail();
	}
}
 
Example 9
Source File: ReduceWithCombinerITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testForkingReduceOnKeyedDataset() throws Exception {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	// creates the input data and distributes them evenly among the available downstream tasks
	DataSet<Tuple3<String, Integer, Boolean>> input = createKeyedInput(env);

	UnsortedGrouping<Tuple3<String, Integer, Boolean>> counts = input.groupBy(0);

	DataSet<Tuple3<String, Integer, Boolean>> r1 = counts.reduceGroup(new KeyedCombReducer());
	DataSet<Tuple3<String, Integer, Boolean>> r2 = counts.reduceGroup(new KeyedGroupCombReducer());

	List<Tuple3<String, Integer, Boolean>> actual = r1.union(r2).collect();
	String expected = "k1,6,true\n" +
		"k2,4,true\n" +
		"k1,6,true\n" +
		"k2,4,true\n";
	compareResultAsTuples(actual, expected);
}
 
Example 10
Source File: GroupingTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test(expected = InvalidProgramException.class)
public void testGroupAtomicTypeWithInvalid1() {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Integer> dataSet = env.fromElements(0, 1, 2, 3);

	dataSet.groupBy("*", "invalidField");
}
 
Example 11
Source File: GroupingTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test(expected = InvalidProgramException.class)
public void testGroupAtomicTypeWithInvalid2() {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Integer> dataSet = env.fromElements(0, 1, 2, 3);

	dataSet.groupBy("invalidField");
}
 
Example 12
Source File: GroupingTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testGroupByKeyExpressions4() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<CustomType> ds = env.fromCollection(customTypeData);

	// should not work, key out of tuple bounds
	ds.groupBy("myNonExistent");
}
 
Example 13
Source File: GroupingTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test(expected = InvalidProgramException.class)
public void testGroupByKeyExpressions2() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> longDs = env.fromCollection(emptyLongData, BasicTypeInfo.LONG_TYPE_INFO);
	// should not work: groups on basic type
	longDs.groupBy("myInt");
}
 
Example 14
Source File: GroupingTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test(expected = InvalidProgramException.class)
public void testGroupAtomicTypeWithInvalid1() {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Integer> dataSet = env.fromElements(0, 1, 2, 3);

	dataSet.groupBy("*", "invalidField");
}
 
Example 15
Source File: GroupingTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test(expected = IndexOutOfBoundsException.class)
public void testGroupByKeyFields5() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo);

	// should not work, negative field position
	tupleDs.groupBy(-1);
}
 
Example 16
Source File: GroupingTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test(expected = IndexOutOfBoundsException.class)
public void testGroupByKeyFields4() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo);

	// should not work, key out of tuple bounds
	tupleDs.groupBy(5);
}
 
Example 17
Source File: GroupingTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test(expected = InvalidProgramException.class)
public void testGroupAtomicTypeWithInvalid2() {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Integer> dataSet = env.fromElements(0, 1, 2, 3);

	dataSet.groupBy("invalidField");
}
 
Example 18
Source File: GroupingTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test(expected = InvalidProgramException.class)
public void testGroupAtomicTypeWithInvalid3() {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<ArrayList<Integer>> dataSet = env.fromElements(new ArrayList<Integer>());

	dataSet.groupBy("*");
}
 
Example 19
Source File: FlinkFlowStep.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
private DataSet<Tuple> translateGroupBy(DataSet<Tuple> input, FlowNode node, int dop) {

		GroupBy groupBy = (GroupBy) node.getSourceElements().iterator().next();

		Scope outScope = getOutScope(node);
		List<Scope> inScopes = getInputScopes(node, groupBy);

		Fields outFields;
		if(outScope.isEvery()) {
			outFields = outScope.getOutGroupingFields();
		}
		else {
			outFields = outScope.getOutValuesFields();
		}
		registerKryoTypes(outFields);

		// get input scope
		Scope inScope = inScopes.get(0);

		// get grouping keys
		Fields groupKeyFields = groupBy.getKeySelectors().get(inScope.getName());
		// get group sorting keys
		Fields sortKeyFields = groupBy.getSortingSelectors().get(inScope.getName());

		String[] groupKeys = registerKeyFields(input, groupKeyFields);
		String[] sortKeys = null;
		if (sortKeyFields != null) {
			sortKeys = registerKeyFields(input, sortKeyFields);
		}
		Order sortOrder = groupBy.isSortReversed() ? Order.DESCENDING : Order.ASCENDING;

		if(sortOrder == Order.DESCENDING) {
			// translate groupBy with inverse sort order
			return translateInverseSortedGroupBy(input, node, dop, groupKeys, sortKeys, outFields);
		}
		else if(groupKeys == null || groupKeys.length == 0) {
			// translate key-less (global) groupBy
			return translateGlobalGroupBy(input, node, dop, sortKeys, sortOrder, outFields);
		}
		else {

			UnsortedGrouping<Tuple> grouping = input
					.groupBy(groupKeys);

			if(sortKeys != null && sortKeys.length > 0) {
				// translate groupBy with group sorting

				SortedGrouping<Tuple> sortedGrouping = grouping
						.sortGroup(sortKeys[0], Order.ASCENDING);
				for(int i=1; i<sortKeys.length; i++) {
					sortedGrouping = sortedGrouping
							.sortGroup(sortKeys[i], Order.DESCENDING);
				}

				return sortedGrouping
						.reduceGroup(new GroupByReducer(node))
						.returns(new TupleTypeInfo(outFields))
						.withParameters(this.getFlinkNodeConfig(node))
						.setParallelism(dop)
						.name("reduce-" + node.getID());
			}
			else {
				// translate groupBy without group sorting

				return grouping
						.reduceGroup(new GroupByReducer(node))
						.returns(new TupleTypeInfo(outFields))
						.withParameters(this.getFlinkNodeConfig(node))
						.setParallelism(dop)
						.name("reduce-" + node.getID());
			}
		}

	}
 
Example 20
Source File: GroupingTest.java    From Flink-CEPplus with Apache License 2.0 3 votes vote down vote up
@Test(expected = InvalidProgramException.class)
public void testGroupByKeyExpressions3() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	this.customTypeData.add(new CustomType());

	DataSet<CustomType> customDs = env.fromCollection(customTypeData);
	// should not work: tuple selector on custom type
	customDs.groupBy(0);

}