org.apache.flink.api.java.operators.IterativeDataSet Java Examples

The following examples show how to use org.apache.flink.api.java.operators.IterativeDataSet. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: IterationCompilerTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testIdentityIteration() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(43);
		
		IterativeDataSet<Long> iteration = env.generateSequence(-4, 1000).iterate(100);
		iteration.closeWith(iteration).output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Example #2

Source File: AggregatorsITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testDistributedCacheWithIterations() throws Exception{
	final String testString = "Et tu, Brute?";
	final String testName = "testing_caesar";

	final File folder = tempFolder.newFolder();
	final File resultFile = new File(folder, UUID.randomUUID().toString());

	String testPath = resultFile.toString();
	String resultPath = resultFile.toURI().toString();

	File tempFile = new File(testPath);
	try (FileWriter writer = new FileWriter(tempFile)) {
		writer.write(testString);
	}

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.registerCachedFile(resultPath, testName);

	IterativeDataSet<Long> solution = env.fromElements(1L).iterate(2);
	solution.closeWith(env.generateSequence(1, 2).filter(new RichFilterFunction<Long>() {
		@Override
		public void open(Configuration parameters) throws Exception{
			File file = getRuntimeContext().getDistributedCache().getFile(testName);
			BufferedReader reader = new BufferedReader(new FileReader(file));
			String output = reader.readLine();
			reader.close();
			assertEquals(output, testString);
		}

		@Override
		public boolean filter(Long value) throws Exception {
			return false;
		}
	}).withBroadcastSet(solution, "SOLUTION")).output(new DiscardingOutputFormat<Long>());
	env.execute();
}

Example #3

Source File: IterationWithAllReducerITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<String> initialInput = env.fromElements("1", "1", "1", "1", "1", "1", "1", "1");

	IterativeDataSet<String> iteration = initialInput.iterate(5).name("Loop");

	DataSet<String> sumReduce = iteration.reduce(new ReduceFunction<String>(){
		@Override
		public String reduce(String value1, String value2) throws Exception {
			return value1;
		}
	}).name("Compute sum (Reduce)");

	List<String> result = iteration.closeWith(sumReduce).collect();

	compareResultAsText(result, EXPECTED);
}

Example #4

Source File: CollectionExecutionIterationTest.java From flink with Apache License 2.0

6 votes

@Test
public void testBulkIteration() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.createCollectionsEnvironment();

		IterativeDataSet<Integer> iteration = env.fromElements(1).iterate(10);

		DataSet<Integer> result = iteration.closeWith(iteration.map(new AddSuperstepNumberMapper()));

		List<Integer> collected = new ArrayList<Integer>();
		result.output(new LocalCollectionOutputFormat<Integer>(collected));

		env.execute();

		assertEquals(1, collected.size());
		assertEquals(56, collected.get(0).intValue());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Example #5

Source File: CollectionExecutionIterationTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testBulkIteration() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.createCollectionsEnvironment();

		IterativeDataSet<Integer> iteration = env.fromElements(1).iterate(10);

		DataSet<Integer> result = iteration.closeWith(iteration.map(new AddSuperstepNumberMapper()));

		List<Integer> collected = new ArrayList<Integer>();
		result.output(new LocalCollectionOutputFormat<Integer>(collected));

		env.execute();

		assertEquals(1, collected.size());
		assertEquals(56, collected.get(0).intValue());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Example #6

Source File: CollectionExecutionIterationTest.java From flink with Apache License 2.0

6 votes

@Test
public void testBulkIteration() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.createCollectionsEnvironment();

		IterativeDataSet<Integer> iteration = env.fromElements(1).iterate(10);

		DataSet<Integer> result = iteration.closeWith(iteration.map(new AddSuperstepNumberMapper()));

		List<Integer> collected = new ArrayList<Integer>();
		result.output(new LocalCollectionOutputFormat<Integer>(collected));

		env.execute();

		assertEquals(1, collected.size());
		assertEquals(56, collected.get(0).intValue());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Example #7

Source File: IterationIncompleteDynamicPathConsumptionITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// the test data is constructed such that the merge join zig zag
	// has an early out, leaving elements on the dynamic path input unconsumed

	DataSet<Path> edges = env.fromElements(
			new Path(1, 2),
			new Path(1, 4),
			new Path(3, 6),
			new Path(3, 8),
			new Path(1, 10),
			new Path(1, 12),
			new Path(3, 14),
			new Path(3, 16),
			new Path(1, 18),
			new Path(1, 20));

	IterativeDataSet<Path> currentPaths = edges.iterate(10);

	DataSet<Path> newPaths = currentPaths
			.join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from")
				.with(new PathConnector())
			.union(currentPaths).distinct("from", "to");

	DataSet<Path> result = currentPaths.closeWith(newPaths);

	result.output(new DiscardingOutputFormat<Path>());

	env.execute();
}

Example #8

Source File: KMeansForTest.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	if (args.length < 3) {
		throw new IllegalArgumentException("Missing parameters");
	}

	final String pointsData = args[0];
	final String centersData = args[1];
	final int numIterations = Integer.parseInt(args[2]);

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// get input data
	DataSet<Point> points = env.fromElements(pointsData.split("\n"))
			.map(new TuplePointConverter());

	DataSet<Centroid> centroids = env.fromElements(centersData.split("\n"))
			.map(new TupleCentroidConverter());

	// set number of bulk iterations for KMeans algorithm
	IterativeDataSet<Centroid> loop = centroids.iterate(numIterations);

	DataSet<Centroid> newCentroids = points
			// compute closest centroid for each point
			.map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids")

			// count and sum point coordinates for each centroid (test pojo return type)
			.map(new CountAppender())

			// !test if key expressions are working!
			.groupBy("field0").reduce(new CentroidAccumulator())

			// compute new centroids from point counts and coordinate sums
			.map(new CentroidAverager());

	// feed new centroids back into next iteration
	DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids);

	// test that custom data type collects are working
	finalCentroids.collect();
}

Example #9

Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0

5 votes

/**
 * <pre>
 *             +---------Iteration-------+
 *             |                         |
 *    /--map--< >----\                   |
 *   /         |      \         /-------< >---sink
 * src-map     |     join------/         |
 *   \         |      /                  |
 *    \        +-----/-------------------+
 *     \            /
 *      \--reduce--/
 * </pre>
 */
@Test
public void testIterationWithStaticInput() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(100);

		DataSet<Long> source = env.generateSequence(1, 1000000);

		DataSet<Long> mapped = source.map(new IdentityMapper<Long>());

		DataSet<Long> reduced = source.groupBy(new IdentityKeyExtractor<Long>()).reduce(new SelectOneReducer<Long>());

		IterativeDataSet<Long> iteration = mapped.iterate(10);
		iteration.closeWith(
				iteration.join(reduced)
						.where(new IdentityKeyExtractor<Long>())
						.equalTo(new IdentityKeyExtractor<Long>())
						.with(new DummyFlatJoinFunction<Long>()))
				.output(new DiscardingOutputFormat<Long>());

		compileNoStats(env.createProgramPlan());
	}
	catch(Exception e){
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Example #10

Source File: AggregatorsITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testDistributedCacheWithIterations() throws Exception{
	final String testString = "Et tu, Brute?";
	final String testName = "testing_caesar";

	final File folder = tempFolder.newFolder();
	final File resultFile = new File(folder, UUID.randomUUID().toString());

	String testPath = resultFile.toString();
	String resultPath = resultFile.toURI().toString();

	File tempFile = new File(testPath);
	try (FileWriter writer = new FileWriter(tempFile)) {
		writer.write(testString);
	}

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.registerCachedFile(resultPath, testName);

	IterativeDataSet<Long> solution = env.fromElements(1L).iterate(2);
	solution.closeWith(env.generateSequence(1, 2).filter(new RichFilterFunction<Long>() {
		@Override
		public void open(Configuration parameters) throws Exception{
			File file = getRuntimeContext().getDistributedCache().getFile(testName);
			BufferedReader reader = new BufferedReader(new FileReader(file));
			String output = reader.readLine();
			reader.close();
			assertEquals(output, testString);
		}

		@Override
		public boolean filter(Long value) throws Exception {
			return false;
		}
	}).withBroadcastSet(solution, "SOLUTION")).output(new DiscardingOutputFormat<Long>());
	env.execute();
}

Example #11

Source File: BranchingPlansCompilerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testMultipleIterations() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(100);
	
	DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
	
	DataSet<String> reduced = input
			.map(new IdentityMapper<String>())
			.reduceGroup(new Top1GroupReducer<String>());
		
	IterativeDataSet<String> iteration1 = input.iterate(100);
	IterativeDataSet<String> iteration2 = input.iterate(20);
	IterativeDataSet<String> iteration3 = input.iterate(17);
	
	iteration1.closeWith(iteration1.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "bc1"))
			.output(new DiscardingOutputFormat<String>());
	iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>()).withBroadcastSet(reduced, "bc2"))
			.output(new DiscardingOutputFormat<String>());
	iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>()).withBroadcastSet(reduced, "bc3"))
			.output(new DiscardingOutputFormat<String>());
	
	Plan plan = env.createProgramPlan();
	
	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Example #12

Source File: IterationIncompleteDynamicPathConsumptionITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// the test data is constructed such that the merge join zig zag
	// has an early out, leaving elements on the dynamic path input unconsumed

	DataSet<Path> edges = env.fromElements(
			new Path(1, 2),
			new Path(1, 4),
			new Path(3, 6),
			new Path(3, 8),
			new Path(1, 10),
			new Path(1, 12),
			new Path(3, 14),
			new Path(3, 16),
			new Path(1, 18),
			new Path(1, 20));

	IterativeDataSet<Path> currentPaths = edges.iterate(10);

	DataSet<Path> newPaths = currentPaths
			.join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from")
				.with(new PathConnector())
			.union(currentPaths).distinct("from", "to");

	DataSet<Path> result = currentPaths.closeWith(newPaths);

	result.output(new DiscardingOutputFormat<Path>());

	env.execute();
}

Example #13

Source File: BranchingPlansCompilerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Test to ensure that sourceA is inside as well as outside of the iteration the same
 * node.
 *
 * <pre>
 *       (SRC A)               (SRC B)
 *      /       \             /       \
 *  (SINK 1)   (ITERATION)    |     (SINK 2)
 *             /        \     /
 *         (SINK 3)     (CROSS => NEXT PARTIAL SOLUTION)
 * </pre>
 */
@Test
public void testClosure() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> sourceA = env.generateSequence(0,1);
	DataSet<Long> sourceB = env.generateSequence(0,1);

	sourceA.output(new DiscardingOutputFormat<Long>());
	sourceB.output(new DiscardingOutputFormat<Long>());

	IterativeDataSet<Long> loopHead = sourceA.iterate(10).name("Loop");

	DataSet<Long> loopTail = loopHead.cross(sourceB).with(new IdentityCrosser<Long>());
	DataSet<Long> loopRes = loopHead.closeWith(loopTail);

	loopRes.output(new DiscardingOutputFormat<Long>());

	Plan plan = env.createProgramPlan();

	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Example #14

Source File: AggregatorConvergenceITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testConnectedComponentsWithParametrizableConvergence() throws Exception {

	// name of the aggregator that checks for convergence
	final String updatedElements = "updated.elements.aggr";

	// the iteration stops if less than this number of elements change value
	final long convergenceThreshold = 3;

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput);
	DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput);

	IterativeDataSet<Tuple2<Long, Long>> iteration = initialSolutionSet.iterate(10);

	// register the convergence criterion
	iteration.registerAggregationConvergenceCriterion(updatedElements,
		new LongSumAggregator(), new UpdatedElementsConvergenceCriterion(convergenceThreshold));

	DataSet<Tuple2<Long, Long>> verticesWithNewComponents = iteration.join(edges).where(0).equalTo(0)
		.with(new NeighborWithComponentIDJoin())
		.groupBy(0).min(1);

	DataSet<Tuple2<Long, Long>> updatedComponentId =
		verticesWithNewComponents.join(iteration).where(0).equalTo(0)
			.flatMap(new MinimumIdFilter(updatedElements));

	List<Tuple2<Long, Long>> result = iteration.closeWith(updatedComponentId).collect();
	Collections.sort(result, new TestBaseUtils.TupleComparator<Tuple2<Long, Long>>());

	assertEquals(expectedResult, result);
}

Example #15

Source File: NestedIterationsTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testBulkIterationInClosure() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Long> data1 = env.generateSequence(1, 100);
		DataSet<Long> data2 = env.generateSequence(1, 100);
		
		IterativeDataSet<Long> firstIteration = data1.iterate(100);
		
		DataSet<Long> firstResult = firstIteration.closeWith(firstIteration.map(new IdentityMapper<Long>()));
		
		
		IterativeDataSet<Long> mainIteration = data2.map(new IdentityMapper<Long>()).iterate(100);
		
		DataSet<Long> joined = mainIteration.join(firstResult)
				.where(new IdentityKeyExtractor<Long>()).equalTo(new IdentityKeyExtractor<Long>())
				.with(new DummyFlatJoinFunction<Long>());
		
		DataSet<Long> mainResult = mainIteration.closeWith(joined);
		
		mainResult.output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		
		// optimizer should be able to translate this
		OptimizedPlan op = compileNoStats(p);
		
		// job graph generator should be able to translate this
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Example #16

Source File: IterationsCompilerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

public static DataSet<Tuple2<Long, Long>> doSimpleBulkIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) {

		// open a bulk iteration
		IterativeDataSet<Tuple2<Long, Long>> iteration = vertices.iterate(20);

		DataSet<Tuple2<Long, Long>> changes = iteration
				.join(edges).where(0).equalTo(0)
				.flatMap(new FlatMapJoin());

		// close the bulk iteration
		return iteration.closeWith(changes);
	}

Example #17

Source File: IterationIncompleteStaticPathConsumptionITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// the test data is constructed such that the merge join zig zag
	// has an early out, leaving elements on the static path input unconsumed

	DataSet<Path> edges = env.fromElements(
			new Path(2, 1),
			new Path(4, 1),
			new Path(6, 3),
			new Path(8, 3),
			new Path(10, 1),
			new Path(12, 1),
			new Path(14, 3),
			new Path(16, 3),
			new Path(18, 1),
			new Path(20, 1));

	IterativeDataSet<Path> currentPaths = edges.iterate(10);

	DataSet<Path> newPaths = currentPaths
			.join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from")
				.with(new PathConnector())
			.union(currentPaths).distinct("from", "to");

	DataSet<Path> result = currentPaths.closeWith(newPaths);

	result.output(new DiscardingOutputFormat<Path>());

	env.execute();
}

Example #18

Source File: IdentityIterationITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	IterativeDataSet<Long> iteration = env.generateSequence(1, 10).iterate(100);
	iteration.closeWith(iteration)
		.output(new LocalCollectionOutputFormat<Long>(result));

	env.execute();
}

Example #19

Source File: AccumulatorIterativeITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(NUM_SUBTASKS);

	IterativeDataSet<Integer> iteration = env.fromElements(1, 2, 3).iterate(NUM_ITERATIONS);

	iteration.closeWith(iteration.reduceGroup(new SumReducer())).output(new DiscardingOutputFormat<Integer>());

	Assert.assertEquals(NUM_ITERATIONS * 6, (int) env.execute().getAccumulatorResult(ACC_NAME));
}

Example #20

Source File: IterationIncompleteStaticPathConsumptionITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// the test data is constructed such that the merge join zig zag
	// has an early out, leaving elements on the static path input unconsumed

	DataSet<Path> edges = env.fromElements(
			new Path(2, 1),
			new Path(4, 1),
			new Path(6, 3),
			new Path(8, 3),
			new Path(10, 1),
			new Path(12, 1),
			new Path(14, 3),
			new Path(16, 3),
			new Path(18, 1),
			new Path(20, 1));

	IterativeDataSet<Path> currentPaths = edges.iterate(10);

	DataSet<Path> newPaths = currentPaths
			.join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from")
				.with(new PathConnector())
			.union(currentPaths).distinct("from", "to");

	DataSet<Path> result = currentPaths.closeWith(newPaths);

	result.output(new DiscardingOutputFormat<Path>());

	env.execute();
}

Example #21

Source File: IterationsCompilerTest.java From flink with Apache License 2.0

5 votes

public static DataSet<Tuple2<Long, Long>> doSimpleBulkIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) {

		// open a bulk iteration
		IterativeDataSet<Tuple2<Long, Long>> iteration = vertices.iterate(20);

		DataSet<Tuple2<Long, Long>> changes = iteration
				.join(edges).where(0).equalTo(0)
				.flatMap(new FlatMapJoin());

		// close the bulk iteration
		return iteration.closeWith(changes);
	}

Example #22

Source File: AggregatorsITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testDistributedCacheWithIterations() throws Exception{
	final String testString = "Et tu, Brute?";
	final String testName = "testing_caesar";

	final File folder = tempFolder.newFolder();
	final File resultFile = new File(folder, UUID.randomUUID().toString());

	String testPath = resultFile.toString();
	String resultPath = resultFile.toURI().toString();

	File tempFile = new File(testPath);
	try (FileWriter writer = new FileWriter(tempFile)) {
		writer.write(testString);
	}

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.registerCachedFile(resultPath, testName);

	IterativeDataSet<Long> solution = env.fromElements(1L).iterate(2);
	solution.closeWith(env.generateSequence(1, 2).filter(new RichFilterFunction<Long>() {
		@Override
		public void open(Configuration parameters) throws Exception{
			File file = getRuntimeContext().getDistributedCache().getFile(testName);
			BufferedReader reader = new BufferedReader(new FileReader(file));
			String output = reader.readLine();
			reader.close();
			assertEquals(output, testString);
		}

		@Override
		public boolean filter(Long value) throws Exception {
			return false;
		}
	}).withBroadcastSet(solution, "SOLUTION")).output(new DiscardingOutputFormat<Long>());
	env.execute();
}

Example #23

Source File: AggregatorsITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testAggregatorWithoutParameterForIterate() throws Exception {
	/*
	 * Test aggregator without parameter for iterate
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Integer> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env);
	IterativeDataSet<Integer> iteration = initialSolutionSet.iterate(MAX_ITERATIONS);

	// register aggregator
	LongSumAggregator aggr = new LongSumAggregator();
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	// register convergence criterion
	iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr,
			new NegativeElementsConvergenceCriterion());

	DataSet<Integer> updatedDs = iteration.map(new SubtractOneMap());
	List<Integer> result = iteration.closeWith(updatedDs).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(-3, -2, -2, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1, 1);

	assertEquals(expected, result);
}

Example #24

Source File: AggregatorsITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testConvergenceCriterionWithParameterForIterate() throws Exception {
	/*
	 * Test convergence criterion with parameter for iterate
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Integer> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env);
	IterativeDataSet<Integer> iteration = initialSolutionSet.iterate(MAX_ITERATIONS);

	// register aggregator
	LongSumAggregator aggr = new LongSumAggregator();
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	// register convergence criterion
	iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr,
			new NegativeElementsConvergenceCriterionWithParam(3));

	DataSet<Integer> updatedDs = iteration.map(new SubtractOneMap());
	List<Integer> result = iteration.closeWith(updatedDs).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(-3, -2, -2, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1, 1);

	assertEquals(expected, result);
}

Example #25

Source File: CollectionExecutionIterationTest.java From flink with Apache License 2.0

5 votes

@Test
public void testBulkIterationWithTerminationCriterion() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.createCollectionsEnvironment();

		IterativeDataSet<Integer> iteration = env.fromElements(1).iterate(100);

		DataSet<Integer> iterationResult = iteration.map(new AddSuperstepNumberMapper());

		DataSet<Integer> terminationCriterion = iterationResult.filter(new FilterFunction<Integer>() {
			public boolean filter(Integer value) {
				return value < 50;
			}
		});

		List<Integer> collected = new ArrayList<Integer>();

		iteration.closeWith(iterationResult, terminationCriterion)
				.output(new LocalCollectionOutputFormat<Integer>(collected));

		env.execute();

		assertEquals(1, collected.size());
		assertEquals(56, collected.get(0).intValue());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Example #26

Source File: IterationsCompilerTest.java From flink with Apache License 2.0

5 votes

public static DataSet<Tuple2<Long, Long>> doBulkIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) {
	
	// open a bulk iteration
	IterativeDataSet<Tuple2<Long, Long>> iteration = vertices.iterate(20);
	
	DataSet<Tuple2<Long, Long>> changes = iteration
			.join(edges).where(0).equalTo(0).with(new Join222())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration).where(0).equalTo(0)
			.flatMap(new FlatMapJoin());
	
	// close the bulk iteration
	return iteration.closeWith(changes);
}

Example #27

Source File: AggregatorsITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testAggregatorWithParameterForIterate() throws Exception {
	/*
	 * Test aggregator with parameter for iterate
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Integer> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env);
	IterativeDataSet<Integer> iteration = initialSolutionSet.iterate(MAX_ITERATIONS);

	// register aggregator
	LongSumAggregatorWithParameter aggr = new LongSumAggregatorWithParameter(0);
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	// register convergence criterion
	iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr,
			new NegativeElementsConvergenceCriterion());

	DataSet<Integer> updatedDs = iteration.map(new SubtractOneMapWithParam());
	List<Integer> result = iteration.closeWith(updatedDs).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(-3, -2, -2, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1, 1);

	assertEquals(expected, result);
}

Example #28

Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0

5 votes

/**
 * <pre>
 *             +---------Iteration-------+
 *             |                         |
 *    /--map--< >----\                   |
 *   /         |      \         /-------< >---sink
 * src-map     |     join------/         |
 *   \         |      /                  |
 *    \        +-----/-------------------+
 *     \            /
 *      \--reduce--/
 * </pre>
 */
@Test
public void testIterationWithStaticInput() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(100);

		DataSet<Long> source = env.generateSequence(1, 1000000);

		DataSet<Long> mapped = source.map(new IdentityMapper<Long>());

		DataSet<Long> reduced = source.groupBy(new IdentityKeyExtractor<Long>()).reduce(new SelectOneReducer<Long>());

		IterativeDataSet<Long> iteration = mapped.iterate(10);
		iteration.closeWith(
				iteration.join(reduced)
						.where(new IdentityKeyExtractor<Long>())
						.equalTo(new IdentityKeyExtractor<Long>())
						.with(new DummyFlatJoinFunction<Long>()))
				.output(new DiscardingOutputFormat<Long>());

		compileNoStats(env.createProgramPlan());
	}
	catch(Exception e){
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Example #29

Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testMultipleIterations() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(100);
	
	DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
	
	DataSet<String> reduced = input
			.map(new IdentityMapper<String>())
			.reduceGroup(new Top1GroupReducer<String>());
		
	IterativeDataSet<String> iteration1 = input.iterate(100);
	IterativeDataSet<String> iteration2 = input.iterate(20);
	IterativeDataSet<String> iteration3 = input.iterate(17);
	
	iteration1.closeWith(iteration1.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "bc1"))
			.output(new DiscardingOutputFormat<String>());
	iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>()).withBroadcastSet(reduced, "bc2"))
			.output(new DiscardingOutputFormat<String>());
	iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>()).withBroadcastSet(reduced, "bc3"))
			.output(new DiscardingOutputFormat<String>());
	
	Plan plan = env.createProgramPlan();
	
	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Example #30

Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testMultipleIterationsWithClosueBCVars() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(100);

	DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
		
	IterativeDataSet<String> iteration1 = input.iterate(100);
	IterativeDataSet<String> iteration2 = input.iterate(20);
	IterativeDataSet<String> iteration3 = input.iterate(17);
	
	
	iteration1.closeWith(iteration1.map(new IdentityMapper<String>()))
			.output(new DiscardingOutputFormat<String>());
	iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>()))
			.output(new DiscardingOutputFormat<String>());
	iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>()))
			.output(new DiscardingOutputFormat<String>());
	
	Plan plan = env.createProgramPlan();
	
	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}