Java Code Examples for org.apache.flink.api.java.DataSet#collect()

The following examples show how to use org.apache.flink.api.java.DataSet#collect() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OuterJoinITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testJoinWithTupleReturningKeySelectors() throws Exception {
	/*
	 * UDF Join on tuples with tuple-returning key selectors
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.getSmall5TupleDataSet(env);
	DataSet<Tuple2<String, String>> joinDs =
			ds1.fullOuterJoin(ds2)
					.where(new KeySelector3()) //0, 1
					.equalTo(new KeySelector4()) // 0, 4
					.with(new T3T5FlatJoin());

	List<Tuple2<String, String>> result = joinDs.collect();

	String expected = "Hi,Hallo\n" +
			"Hello,Hallo Welt\n" +
			"Hello world,null\n" +
			"null,Hallo Welt wie\n";

	compareResultAsTuples(result, expected);
}
 
Example 2
Source File: GraphOperationsITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testTriplets() throws Exception {
	/*
	 * Test getTriplets()
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
		TestGraphUtils.getLongLongEdgeData(env), env);

	DataSet<Triplet<Long, Long, Long>> data = graph.getTriplets();
	List<Triplet<Long, Long, Long>> result = data.collect();

	expectedResult = "1,2,1,2,12\n" + "1,3,1,3,13\n" +
		"2,3,2,3,23\n" + "3,4,3,4,34\n" +
		"3,5,3,5,35\n" + "4,5,4,5,45\n" +
		"5,1,5,1,51\n";

	compareResultAsTuples(result, expectedResult);
}
 
Example 3
Source File: FilterITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testFilterOnIntegerTupleField() throws Exception {
	/*
	 * Test filter on Integer tuple field.
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> filterDs = ds.
			filter(new Filter4());
	List<Tuple3<Integer, Long, String>> result = filterDs.collect();

	String expected = "2,2,Hello\n" +
			"4,3,Hello world, how are you?\n" +
			"6,3,Luke Skywalker\n" +
			"8,4,Comment#2\n" +
			"10,4,Comment#4\n" +
			"12,5,Comment#6\n" +
			"14,5,Comment#8\n" +
			"16,6,Comment#10\n" +
			"18,6,Comment#12\n" +
			"20,6,Comment#14\n";

	compareResultAsTuples(result, expected);
}
 
Example 4
Source File: ReduceOnNeighborMethodsITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testSumOfAllNeighborsNoValue() throws Exception {
	/*
	 * Get the sum of all neighbor values
	 * for each vertex
        */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
		TestGraphUtils.getLongLongEdgeData(env), env);

	DataSet<Tuple2<Long, Long>> verticesWithSumOfAllNeighborValues =
		graph.reduceOnNeighbors(new SumNeighbors(), EdgeDirection.ALL);
	List<Tuple2<Long, Long>> result = verticesWithSumOfAllNeighborValues.collect();

	expectedResult = "1,10\n" +
		"2,4\n" +
		"3,12\n" +
		"4,8\n" +
		"5,8\n";

	compareResultAsTuples(result, expectedResult);
}
 
Example 5
Source File: GroupReduceITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testCorrectnessOfGroupreduceWithDescendingGroupSort() throws Exception {
	/*
	 * check correctness of groupReduce with descending group sort
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
			groupBy(1).sortGroup(2, Order.DESCENDING).reduceGroup(new Tuple3SortedGroupReduce());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "1,1,Hi\n"
			+
			"5,2,Hello world-Hello\n" +
			"15,3,Luke Skywalker-I am fine.-Hello world, how are you?\n" +
			"34,4,Comment#4-Comment#3-Comment#2-Comment#1\n" +
			"65,5,Comment#9-Comment#8-Comment#7-Comment#6-Comment#5\n" +
			"111,6,Comment#15-Comment#14-Comment#13-Comment#12-Comment#11-Comment#10\n";

	compareResultAsTuples(result, expected);
}
 
Example 6
Source File: ReduceOnNeighborMethodsITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testSumOfInNeighbors() throws Exception {
	/*
	 * Get the sum of in-neighbor values
	 * times the edge weights for each vertex
        */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
		TestGraphUtils.getLongLongEdgeData(env), env);

	DataSet<Tuple2<Long, Long>> verticesWithSum =
		graph.groupReduceOnNeighbors(new SumInNeighbors(), EdgeDirection.IN);
	List<Tuple2<Long, Long>> result = verticesWithSum.collect();

	expectedResult = "1,255\n" +
		"2,12\n" +
		"3,59\n" +
		"4,102\n" +
		"5,285\n";

	compareResultAsTuples(result, expectedResult);
}
 
Example 7
Source File: ExecutionEnvironmentITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Ensure that the user can pass a custom configuration object to the LocalEnvironment.
 */
@Test
public void testLocalEnvironmentWithConfig() throws Exception {
	Configuration conf = new Configuration();
	conf.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, PARALLELISM);

	final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf);
	env.setParallelism(ExecutionConfig.PARALLELISM_AUTO_MAX);
	env.getConfig().disableSysoutLogging();

	DataSet<Integer> result = env.createInput(new ParallelismDependentInputFormat())
			.rebalance()
			.mapPartition(new RichMapPartitionFunction<Integer, Integer>() {
				@Override
				public void mapPartition(Iterable<Integer> values, Collector<Integer> out) throws Exception {
					out.collect(getRuntimeContext().getIndexOfThisSubtask());
				}
			});
	List<Integer> resultCollection = result.collect();
	assertEquals(PARALLELISM, resultCollection.size());
}
 
Example 8
Source File: JoinWithVerticesITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testWithLessElements() throws Exception {
	/*
	 * Test joinWithVertices with the input DataSet passed as a parameter containing
	 * less elements than the vertex DataSet, but of the same type
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
		TestGraphUtils.getLongLongEdgeData(env), env);

	Graph<Long, Long, Long> res = graph.joinWithVertices(graph.getVertices().first(3)
		.map(new VertexToTuple2Map<>()), new AddValuesMapper());

	DataSet<Vertex<Long, Long>> data = res.getVertices();
	List<Vertex<Long, Long>> result = data.collect();

	expectedResult = "1,2\n" +
		"2,4\n" +
		"3,6\n" +
		"4,4\n" +
		"5,5\n";

	compareResultAsTuples(result, expectedResult);
}
 
Example 9
Source File: GroupReduceITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testInputOfCombinerIsSortedForCombinableGroupReduceWithGroupSorting() throws Exception {
	/*
	 * check that input of combiner is also sorted for combinable groupReduce with group sorting
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
			groupBy(1).sortGroup(0, Order.ASCENDING).reduceGroup(new OrderCheckingCombinableReduce());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "1,1,Hi\n" +
			"2,2,Hello\n" +
			"4,3,Hello world, how are you?\n" +
			"7,4,Comment#1\n" +
			"11,5,Comment#5\n" +
			"16,6,Comment#10\n";

	compareResultAsTuples(result, expected);
}
 
Example 10
Source File: ReduceITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testReduceOnCustomTypeWithKeyExtractor() throws Exception {
	/*
	 * Reduce on custom type with key extractor
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
	DataSet<CustomType> reduceDs = ds.
			groupBy(new KeySelector2()).reduce(new CustomTypeReduce());

	List<CustomType> result = reduceDs.collect();

	String expected = "1,0,Hi\n" +
			"2,3,Hello!\n" +
			"3,12,Hello!\n" +
			"4,30,Hello!\n" +
			"5,60,Hello!\n" +
			"6,105,Hello!\n";

	compareResultAsText(result, expected);
}
 
Example 11
Source File: GroupReduceITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testCorrectnessOfGroupReduceOnTuplesWithKeyFieldSelectorAndGroupSorting() throws Exception {
	/*
	 * check correctness of groupReduce on tuples with key field selector and group sorting
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds.
			groupBy(1).sortGroup(2, Order.ASCENDING).reduceGroup(new Tuple3SortedGroupReduce());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "1,1,Hi\n"
			+
			"5,2,Hello-Hello world\n" +
			"15,3,Hello world, how are you?-I am fine.-Luke Skywalker\n" +
			"34,4,Comment#1-Comment#2-Comment#3-Comment#4\n" +
			"65,5,Comment#5-Comment#6-Comment#7-Comment#8-Comment#9\n" +
			"111,6,Comment#10-Comment#11-Comment#12-Comment#13-Comment#14-Comment#15\n";

	compareResultAsTuples(result, expected);
}
 
Example 12
Source File: ScatterGatherConfigurationITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testIterationDefaultDirection() throws Exception {
	/*
	 * Test that if no direction parameter is given, the iteration works as before
	 * (i.e. it collects messages from the in-neighbors and sends them to the out-neighbors)
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	Graph<Long, HashSet<Long>, Long> graph = Graph
		.fromCollection(TestGraphUtils.getLongLongVertices(), TestGraphUtils.getLongLongEdges(), env)
		.mapVertices(new InitialiseHashSetMapper());

	DataSet<Vertex<Long, HashSet<Long>>> resultedVertices = graph
		.runScatterGatherIteration(new IdMessengerTrg(), new VertexUpdateDirection(), 5)
		.getVertices();

	List<Vertex<Long, HashSet<Long>>> result = resultedVertices.collect();

	expectedResult = "1,[5]\n" +
		"2,[1]\n" +
		"3,[1, 2]\n" +
		"4,[3]\n" +
		"5,[3, 4]";

	compareResultAsTuples(result, expectedResult);
}
 
Example 13
Source File: DistinctITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCorrectnessOfDistinctOnAtomic() throws Exception {
	/*
	 * check correctness of distinct on Integers
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Integer> ds = CollectionDataSets.getIntegerDataSet(env);
	DataSet<Integer> reduceDs = ds.distinct();

	List<Integer> result = reduceDs.collect();

	String expected = "1\n2\n3\n4\n5";

	compareResultAsText(result, expected);
}
 
Example 14
Source File: OuterJoinITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testNestedIntoTuple() throws Exception {
	/*
	 * nested into tuple
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<POJO> ds1 = CollectionDataSets.getSmallPojoDataSet(env);
	DataSet<Tuple7<Integer, String, Integer, Integer, Long, String, Long>> ds2 = CollectionDataSets.getSmallTuplebasedDataSet(env);
	DataSet<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>> joinDs =
			ds1.fullOuterJoin(ds2)
					.where("nestedPojo.longNumber", "number", "nestedTupleWithCustom.f0")
					.equalTo("f6", "f0", "f2")
					.with(new ProjectBothFunction<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>());

	env.setParallelism(1);
	List<Tuple2<POJO, Tuple7<Integer, String, Integer, Integer, Long, String, Long>>> result = joinDs.collect();

	String expected = "1 First (10,100,1000,One) 10000,(1,First,10,100,1000,One,10000)\n" +
			"2 Second (20,200,2000,Two) 20000,(2,Second,20,200,2000,Two,20000)\n" +
			"3 Third (30,300,3000,Three) 30000,(3,Third,30,300,3000,Three,30000)\n";

	compareResultAsTuples(result, expected);
}
 
Example 15
Source File: JoinITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testNonPojoToVerifyFullTupleKeys() throws Exception {
	/*
	 * Non-POJO test to verify that full-tuple keys are working.
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds1 = CollectionDataSets.getSmallNestedTupleDataSet(env);
	DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds2 = CollectionDataSets.getSmallNestedTupleDataSet(env);
	DataSet<Tuple2<Tuple2<Tuple2<Integer, Integer>, String>, Tuple2<Tuple2<Integer, Integer>, String>>> joinDs =
			ds1.join(ds2).where(0).equalTo("f0.f0", "f0.f1"); // key is now Tuple2<Integer, Integer>

	env.setParallelism(1);
	List<Tuple2<Tuple2<Tuple2<Integer, Integer>, String>, Tuple2<Tuple2<Integer, Integer>, String>>> result = joinDs.collect();

	String expected = "((1,1),one),((1,1),one)\n" +
			"((2,2),two),((2,2),two)\n" +
			"((3,3),three),((3,3),three)\n";

	compareResultAsTuples(result, expected);

}
 
Example 16
Source File: JoinITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testNonPojoToVerifyNestedTupleElementSelectionWithFirstKeyFieldGreaterThanZero()
		throws Exception {
	/*
	 * Non-POJO test to verify "nested" tuple-element selection with the first key field greater than 0.
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
	DataSet<Tuple2<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>> ds2 = ds1.join(ds1).where(0).equalTo(0);
	DataSet<Tuple2<Tuple2<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>, Tuple2<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>>> joinDs =
			ds2.join(ds2).where("f1.f0").equalTo("f0.f0");

	env.setParallelism(1);
	List<Tuple2<Tuple2<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>, Tuple2<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>>> result = joinDs.collect();

	String expected = "((1,1,Hi),(1,1,Hi)),((1,1,Hi),(1,1,Hi))\n" +
			"((2,2,Hello),(2,2,Hello)),((2,2,Hello),(2,2,Hello))\n" +
			"((3,2,Hello world),(3,2,Hello world)),((3,2,Hello world),(3,2,Hello world))\n";

	compareResultAsTuples(result, expected);
}
 
Example 17
Source File: UnionITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testUnion2IdenticalDataSets() throws Exception {
	/*
	 * Union of 2 Same Data Sets
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> unionDs = ds.union(CollectionDataSets.get3TupleDataSet(env));

	List<Tuple3<Integer, Long, String>> result = unionDs.collect();

	String expected = FULL_TUPLE_3_STRING + FULL_TUPLE_3_STRING;

	compareResultAsTuples(result, expected);
}
 
Example 18
Source File: MapITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testTypeConversionMapperCustomToTuple() throws Exception {
	/*
	 * Test type conversion mapper (Custom -> Tuple)
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> typeConversionMapDs = ds.
			map(new Mapper3());

	List<Tuple3<Integer, Long, String>> result = typeConversionMapDs.collect();

	String expected = "1,0,Hi\n" +
			"2,1,Hello\n" +
			"2,2,Hello world\n" +
			"3,3,Hello world, how are you?\n" +
			"3,4,I am fine.\n" +
			"3,5,Luke Skywalker\n" +
			"4,6,Comment#1\n" +
			"4,7,Comment#2\n" +
			"4,8,Comment#3\n" +
			"4,9,Comment#4\n" +
			"5,10,Comment#5\n" +
			"5,11,Comment#6\n" +
			"5,12,Comment#7\n" +
			"5,13,Comment#8\n" +
			"5,14,Comment#9\n" +
			"6,15,Comment#10\n" +
			"6,16,Comment#11\n" +
			"6,17,Comment#12\n" +
			"6,18,Comment#13\n" +
			"6,19,Comment#14\n" +
			"6,20,Comment#15\n";

	compareResultAsTuples(result, expected);
}
 
Example 19
Source File: JoinITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testProjectOnATuple1Input() throws Exception {
	/*
	 * Project join on a tuple input 1
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple6<String, Long, String, Integer, Long, Long>> joinDs =
			ds1.join(ds2)
					.where(1)
					.equalTo(1)
					.projectFirst(2, 1)
					.projectSecond(3)
					.projectFirst(0)
					.projectSecond(4, 1);

	List<Tuple6<String, Long, String, Integer, Long, Long>> result = joinDs.collect();

	String expected = "Hi,1,Hallo,1,1,1\n" +
			"Hello,2,Hallo Welt,2,2,2\n" +
			"Hello world,2,Hallo Welt,3,2,2\n";

	compareResultAsTuples(result, expected);
}
 
Example 20
Source File: OrcTableSourceITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testScanWithProjectionAndFilter() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	BatchTableEnvironment tEnv = BatchTableEnvironment.create(env);

	OrcTableSource orc = OrcTableSource.builder()
		.path(getPath(TEST_FILE_FLAT))
		.forOrcSchema(TEST_SCHEMA_FLAT)
		.build();
	tEnv.registerTableSource("OrcTable", orc);

	String query =
		"SELECT " +
			"MIN(_col4), MAX(_col4), " +
			"MIN(_col3), MAX(_col3), " +
			"MIN(_col0), MAX(_col0), " +
			"MIN(_col2), MAX(_col2), " +
			"COUNT(*) " +
			"FROM OrcTable " +
			"WHERE (_col0 BETWEEN 4975 and 5024 OR _col0 BETWEEN 9975 AND 10024) AND _col1 = 'F'";
	Table t = tEnv.sqlQuery(query);

	DataSet<Row> dataSet = tEnv.toDataSet(t, Row.class);
	List<Row> result = dataSet.collect();

	assertEquals(1, result.size());
	assertEquals(
		"1500,6000,2 yr Degree,Unknown,4976,10024,D,W,50",
		result.get(0).toString());
}