org.apache.flink.api.java.operators.IterativeDataSet Java Exaples

Source File: IterationCompilerTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testIdentityIteration() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(43);
		
		IterativeDataSet<Long> iteration = env.generateSequence(-4, 1000).iterate(100);
		iteration.closeWith(iteration).output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: AggregatorsITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testDistributedCacheWithIterations() throws Exception{
	final String testString = "Et tu, Brute?";
	final String testName = "testing_caesar";

	final File folder = tempFolder.newFolder();
	final File resultFile = new File(folder, UUID.randomUUID().toString());

	String testPath = resultFile.toString();
	String resultPath = resultFile.toURI().toString();

	File tempFile = new File(testPath);
	try (FileWriter writer = new FileWriter(tempFile)) {
		writer.write(testString);
	}

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.registerCachedFile(resultPath, testName);

	IterativeDataSet<Long> solution = env.fromElements(1L).iterate(2);
	solution.closeWith(env.generateSequence(1, 2).filter(new RichFilterFunction<Long>() {
		@Override
		public void open(Configuration parameters) throws Exception{
			File file = getRuntimeContext().getDistributedCache().getFile(testName);
			BufferedReader reader = new BufferedReader(new FileReader(file));
			String output = reader.readLine();
			reader.close();
			assertEquals(output, testString);
		}

		@Override
		public boolean filter(Long value) throws Exception {
			return false;
		}
	}).withBroadcastSet(solution, "SOLUTION")).output(new DiscardingOutputFormat<Long>());
	env.execute();
}

Source File: IterationWithAllReducerITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<String> initialInput = env.fromElements("1", "1", "1", "1", "1", "1", "1", "1");

	IterativeDataSet<String> iteration = initialInput.iterate(5).name("Loop");

	DataSet<String> sumReduce = iteration.reduce(new ReduceFunction<String>(){
		@Override
		public String reduce(String value1, String value2) throws Exception {
			return value1;
		}
	}).name("Compute sum (Reduce)");

	List<String> result = iteration.closeWith(sumReduce).collect();

	compareResultAsText(result, EXPECTED);
}

Source File: CollectionExecutionIterationTest.java From flink with Apache License 2.0

6 votes

@Test
public void testBulkIteration() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.createCollectionsEnvironment();

		IterativeDataSet<Integer> iteration = env.fromElements(1).iterate(10);

		DataSet<Integer> result = iteration.closeWith(iteration.map(new AddSuperstepNumberMapper()));

		List<Integer> collected = new ArrayList<Integer>();
		result.output(new LocalCollectionOutputFormat<Integer>(collected));

		env.execute();

		assertEquals(1, collected.size());
		assertEquals(56, collected.get(0).intValue());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: CollectionExecutionIterationTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testBulkIteration() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.createCollectionsEnvironment();

		IterativeDataSet<Integer> iteration = env.fromElements(1).iterate(10);

		DataSet<Integer> result = iteration.closeWith(iteration.map(new AddSuperstepNumberMapper()));

		List<Integer> collected = new ArrayList<Integer>();
		result.output(new LocalCollectionOutputFormat<Integer>(collected));

		env.execute();

		assertEquals(1, collected.size());
		assertEquals(56, collected.get(0).intValue());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: CollectionExecutionIterationTest.java From flink with Apache License 2.0

6 votes

@Test
public void testBulkIteration() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.createCollectionsEnvironment();

		IterativeDataSet<Integer> iteration = env.fromElements(1).iterate(10);

		DataSet<Integer> result = iteration.closeWith(iteration.map(new AddSuperstepNumberMapper()));

		List<Integer> collected = new ArrayList<Integer>();
		result.output(new LocalCollectionOutputFormat<Integer>(collected));

		env.execute();

		assertEquals(1, collected.size());
		assertEquals(56, collected.get(0).intValue());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: IterationIncompleteDynamicPathConsumptionITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// the test data is constructed such that the merge join zig zag
	// has an early out, leaving elements on the dynamic path input unconsumed

	DataSet<Path> edges = env.fromElements(
			new Path(1, 2),
			new Path(1, 4),
			new Path(3, 6),
			new Path(3, 8),
			new Path(1, 10),
			new Path(1, 12),
			new Path(3, 14),
			new Path(3, 16),
			new Path(1, 18),
			new Path(1, 20));

	IterativeDataSet<Path> currentPaths = edges.iterate(10);

	DataSet<Path> newPaths = currentPaths
			.join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from")
				.with(new PathConnector())
			.union(currentPaths).distinct("from", "to");

	DataSet<Path> result = currentPaths.closeWith(newPaths);

	result.output(new DiscardingOutputFormat<Path>());

	env.execute();
}

Source File: KMeansForTest.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	if (args.length < 3) {
		throw new IllegalArgumentException("Missing parameters");
	}

	final String pointsData = args[0];
	final String centersData = args[1];
	final int numIterations = Integer.parseInt(args[2]);

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// get input data
	DataSet<Point> points = env.fromElements(pointsData.split("\n"))
			.map(new TuplePointConverter());

	DataSet<Centroid> centroids = env.fromElements(centersData.split("\n"))
			.map(new TupleCentroidConverter());

	// set number of bulk iterations for KMeans algorithm
	IterativeDataSet<Centroid> loop = centroids.iterate(numIterations);

	DataSet<Centroid> newCentroids = points
			// compute closest centroid for each point
			.map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids")

			// count and sum point coordinates for each centroid (test pojo return type)
			.map(new CountAppender())

			// !test if key expressions are working!
			.groupBy("field0").reduce(new CentroidAccumulator())

			// compute new centroids from point counts and coordinate sums
			.map(new CentroidAverager());

	// feed new centroids back into next iteration
	DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids);

	// test that custom data type collects are working
	finalCentroids.collect();
}

Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0

5 votes

/**
 * <pre>
 *             +---------Iteration-------+
 *             |                         |
 *    /--map--< >----\                   |
 *   /         |      \         /-------< >---sink
 * src-map     |     join------/         |
 *   \         |      /                  |
 *    \        +-----/-------------------+
 *     \            /
 *      \--reduce--/
 * </pre>
 */
@Test
public void testIterationWithStaticInput() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(100);

		DataSet<Long> source = env.generateSequence(1, 1000000);

		DataSet<Long> mapped = source.map(new IdentityMapper<Long>());

		DataSet<Long> reduced = source.groupBy(new IdentityKeyExtractor<Long>()).reduce(new SelectOneReducer<Long>());

		IterativeDataSet<Long> iteration = mapped.iterate(10);
		iteration.closeWith(
				iteration.join(reduced)
						.where(new IdentityKeyExtractor<Long>())
						.equalTo(new IdentityKeyExtractor<Long>())
						.with(new DummyFlatJoinFunction<Long>()))
				.output(new DiscardingOutputFormat<Long>());

		compileNoStats(env.createProgramPlan());
	}
	catch(Exception e){
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: AggregatorsITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testDistributedCacheWithIterations() throws Exception{
	final String testString = "Et tu, Brute?";
	final String testName = "testing_caesar";

	final File folder = tempFolder.newFolder();
	final File resultFile = new File(folder, UUID.randomUUID().toString());

	String testPath = resultFile.toString();
	String resultPath = resultFile.toURI().toString();

	File tempFile = new File(testPath);
	try (FileWriter writer = new FileWriter(tempFile)) {
		writer.write(testString);
	}

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.registerCachedFile(resultPath, testName);

	IterativeDataSet<Long> solution = env.fromElements(1L).iterate(2);
	solution.closeWith(env.generateSequence(1, 2).filter(new RichFilterFunction<Long>() {
		@Override
		public void open(Configuration parameters) throws Exception{
			File file = getRuntimeContext().getDistributedCache().getFile(testName);
			BufferedReader reader = new BufferedReader(new FileReader(file));
			String output = reader.readLine();
			reader.close();
			assertEquals(output, testString);
		}

		@Override
		public boolean filter(Long value) throws Exception {
			return false;
		}
	}).withBroadcastSet(solution, "SOLUTION")).output(new DiscardingOutputFormat<Long>());
	env.execute();
}

Source File: BranchingPlansCompilerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testMultipleIterations() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(100);
	
	DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
	
	DataSet<String> reduced = input
			.map(new IdentityMapper<String>())
			.reduceGroup(new Top1GroupReducer<String>());
		
	IterativeDataSet<String> iteration1 = input.iterate(100);
	IterativeDataSet<String> iteration2 = input.iterate(20);
	IterativeDataSet<String> iteration3 = input.iterate(17);
	
	iteration1.closeWith(iteration1.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "bc1"))
			.output(new DiscardingOutputFormat<String>());
	iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>()).withBroadcastSet(reduced, "bc2"))
			.output(new DiscardingOutputFormat<String>());
	iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>()).withBroadcastSet(reduced, "bc3"))
			.output(new DiscardingOutputFormat<String>());
	
	Plan plan = env.createProgramPlan();
	
	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Source File: IterationIncompleteDynamicPathConsumptionITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// the test data is constructed such that the merge join zig zag
	// has an early out, leaving elements on the dynamic path input unconsumed

	DataSet<Path> edges = env.fromElements(
			new Path(1, 2),
			new Path(1, 4),
			new Path(3, 6),
			new Path(3, 8),
			new Path(1, 10),
			new Path(1, 12),
			new Path(3, 14),
			new Path(3, 16),
			new Path(1, 18),
			new Path(1, 20));

	IterativeDataSet<Path> currentPaths = edges.iterate(10);

	DataSet<Path> newPaths = currentPaths
			.join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from")
				.with(new PathConnector())
			.union(currentPaths).distinct("from", "to");

	DataSet<Path> result = currentPaths.closeWith(newPaths);

	result.output(new DiscardingOutputFormat<Path>());

	env.execute();
}

Source File: BranchingPlansCompilerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Test to ensure that sourceA is inside as well as outside of the iteration the same
 * node.
 *
 * <pre>
 *       (SRC A)               (SRC B)
 *      /       \             /       \
 *  (SINK 1)   (ITERATION)    |     (SINK 2)
 *             /        \     /
 *         (SINK 3)     (CROSS => NEXT PARTIAL SOLUTION)
 * </pre>
 */
@Test
public void testClosure() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> sourceA = env.generateSequence(0,1);
	DataSet<Long> sourceB = env.generateSequence(0,1);

	sourceA.output(new DiscardingOutputFormat<Long>());
	sourceB.output(new DiscardingOutputFormat<Long>());

	IterativeDataSet<Long> loopHead = sourceA.iterate(10).name("Loop");

	DataSet<Long> loopTail = loopHead.cross(sourceB).with(new IdentityCrosser<Long>());
	DataSet<Long> loopRes = loopHead.closeWith(loopTail);

	loopRes.output(new DiscardingOutputFormat<Long>());

	Plan plan = env.createProgramPlan();

	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Source File: AggregatorConvergenceITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testConnectedComponentsWithParametrizableConvergence() throws Exception {

	// name of the aggregator that checks for convergence
	final String updatedElements = "updated.elements.aggr";

	// the iteration stops if less than this number of elements change value
	final long convergenceThreshold = 3;

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput);
	DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput);

	IterativeDataSet<Tuple2<Long, Long>> iteration = initialSolutionSet.iterate(10);

	// register the convergence criterion
	iteration.registerAggregationConvergenceCriterion(updatedElements,
		new LongSumAggregator(), new UpdatedElementsConvergenceCriterion(convergenceThreshold));

	DataSet<Tuple2<Long, Long>> verticesWithNewComponents = iteration.join(edges).where(0).equalTo(0)
		.with(new NeighborWithComponentIDJoin())
		.groupBy(0).min(1);

	DataSet<Tuple2<Long, Long>> updatedComponentId =
		verticesWithNewComponents.join(iteration).where(0).equalTo(0)
			.flatMap(new MinimumIdFilter(updatedElements));

	List<Tuple2<Long, Long>> result = iteration.closeWith(updatedComponentId).collect();
	Collections.sort(result, new TestBaseUtils.TupleComparator<Tuple2<Long, Long>>());

	assertEquals(expectedResult, result);
}

Source File: NestedIterationsTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testBulkIterationInClosure() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Long> data1 = env.generateSequence(1, 100);
		DataSet<Long> data2 = env.generateSequence(1, 100);
		
		IterativeDataSet<Long> firstIteration = data1.iterate(100);
		
		DataSet<Long> firstResult = firstIteration.closeWith(firstIteration.map(new IdentityMapper<Long>()));
		
		
		IterativeDataSet<Long> mainIteration = data2.map(new IdentityMapper<Long>()).iterate(100);
		
		DataSet<Long> joined = mainIteration.join(firstResult)
				.where(new IdentityKeyExtractor<Long>()).equalTo(new IdentityKeyExtractor<Long>())
				.with(new DummyFlatJoinFunction<Long>());
		
		DataSet<Long> mainResult = mainIteration.closeWith(joined);
		
		mainResult.output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		
		// optimizer should be able to translate this
		OptimizedPlan op = compileNoStats(p);
		
		// job graph generator should be able to translate this
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: IterationsCompilerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

public static DataSet<Tuple2<Long, Long>> doSimpleBulkIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) {

		// open a bulk iteration
		IterativeDataSet<Tuple2<Long, Long>> iteration = vertices.iterate(20);

		DataSet<Tuple2<Long, Long>> changes = iteration
				.join(edges).where(0).equalTo(0)
				.flatMap(new FlatMapJoin());

		// close the bulk iteration
		return iteration.closeWith(changes);
	}

Source File: IterationIncompleteStaticPathConsumptionITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// the test data is constructed such that the merge join zig zag
	// has an early out, leaving elements on the static path input unconsumed

	DataSet<Path> edges = env.fromElements(
			new Path(2, 1),
			new Path(4, 1),
			new Path(6, 3),
			new Path(8, 3),
			new Path(10, 1),
			new Path(12, 1),
			new Path(14, 3),
			new Path(16, 3),
			new Path(18, 1),
			new Path(20, 1));

	IterativeDataSet<Path> currentPaths = edges.iterate(10);

	DataSet<Path> newPaths = currentPaths
			.join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from")
				.with(new PathConnector())
			.union(currentPaths).distinct("from", "to");

	DataSet<Path> result = currentPaths.closeWith(newPaths);

	result.output(new DiscardingOutputFormat<Path>());

	env.execute();
}

Source File: IdentityIterationITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	IterativeDataSet<Long> iteration = env.generateSequence(1, 10).iterate(100);
	iteration.closeWith(iteration)
		.output(new LocalCollectionOutputFormat<Long>(result));

	env.execute();
}

Source File: AccumulatorIterativeITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(NUM_SUBTASKS);

	IterativeDataSet<Integer> iteration = env.fromElements(1, 2, 3).iterate(NUM_ITERATIONS);

	iteration.closeWith(iteration.reduceGroup(new SumReducer())).output(new DiscardingOutputFormat<Integer>());

	Assert.assertEquals(NUM_ITERATIONS * 6, (int) env.execute().getAccumulatorResult(ACC_NAME));
}

Source File: IterationIncompleteStaticPathConsumptionITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// the test data is constructed such that the merge join zig zag
	// has an early out, leaving elements on the static path input unconsumed

	DataSet<Path> edges = env.fromElements(
			new Path(2, 1),
			new Path(4, 1),
			new Path(6, 3),
			new Path(8, 3),
			new Path(10, 1),
			new Path(12, 1),
			new Path(14, 3),
			new Path(16, 3),
			new Path(18, 1),
			new Path(20, 1));

	IterativeDataSet<Path> currentPaths = edges.iterate(10);

	DataSet<Path> newPaths = currentPaths
			.join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from")
				.with(new PathConnector())
			.union(currentPaths).distinct("from", "to");

	DataSet<Path> result = currentPaths.closeWith(newPaths);

	result.output(new DiscardingOutputFormat<Path>());

	env.execute();
}

Source File: IterationsCompilerTest.java From flink with Apache License 2.0

5 votes

public static DataSet<Tuple2<Long, Long>> doSimpleBulkIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) {

		// open a bulk iteration
		IterativeDataSet<Tuple2<Long, Long>> iteration = vertices.iterate(20);

		DataSet<Tuple2<Long, Long>> changes = iteration
				.join(edges).where(0).equalTo(0)
				.flatMap(new FlatMapJoin());

		// close the bulk iteration
		return iteration.closeWith(changes);
	}

Source File: AggregatorsITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testDistributedCacheWithIterations() throws Exception{
	final String testString = "Et tu, Brute?";
	final String testName = "testing_caesar";

	final File folder = tempFolder.newFolder();
	final File resultFile = new File(folder, UUID.randomUUID().toString());

	String testPath = resultFile.toString();
	String resultPath = resultFile.toURI().toString();

	File tempFile = new File(testPath);
	try (FileWriter writer = new FileWriter(tempFile)) {
		writer.write(testString);
	}

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.registerCachedFile(resultPath, testName);

	IterativeDataSet<Long> solution = env.fromElements(1L).iterate(2);
	solution.closeWith(env.generateSequence(1, 2).filter(new RichFilterFunction<Long>() {
		@Override
		public void open(Configuration parameters) throws Exception{
			File file = getRuntimeContext().getDistributedCache().getFile(testName);
			BufferedReader reader = new BufferedReader(new FileReader(file));
			String output = reader.readLine();
			reader.close();
			assertEquals(output, testString);
		}

		@Override
		public boolean filter(Long value) throws Exception {
			return false;
		}
	}).withBroadcastSet(solution, "SOLUTION")).output(new DiscardingOutputFormat<Long>());
	env.execute();
}

Source File: AggregatorsITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testAggregatorWithoutParameterForIterate() throws Exception {
	/*
	 * Test aggregator without parameter for iterate
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Integer> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env);
	IterativeDataSet<Integer> iteration = initialSolutionSet.iterate(MAX_ITERATIONS);

	// register aggregator
	LongSumAggregator aggr = new LongSumAggregator();
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	// register convergence criterion
	iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr,
			new NegativeElementsConvergenceCriterion());

	DataSet<Integer> updatedDs = iteration.map(new SubtractOneMap());
	List<Integer> result = iteration.closeWith(updatedDs).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(-3, -2, -2, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1, 1);

	assertEquals(expected, result);
}

Source File: AggregatorsITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testConvergenceCriterionWithParameterForIterate() throws Exception {
	/*
	 * Test convergence criterion with parameter for iterate
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Integer> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env);
	IterativeDataSet<Integer> iteration = initialSolutionSet.iterate(MAX_ITERATIONS);

	// register aggregator
	LongSumAggregator aggr = new LongSumAggregator();
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	// register convergence criterion
	iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr,
			new NegativeElementsConvergenceCriterionWithParam(3));

	DataSet<Integer> updatedDs = iteration.map(new SubtractOneMap());
	List<Integer> result = iteration.closeWith(updatedDs).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(-3, -2, -2, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1, 1);

	assertEquals(expected, result);
}

Source File: CollectionExecutionIterationTest.java From flink with Apache License 2.0

5 votes

@Test
public void testBulkIterationWithTerminationCriterion() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.createCollectionsEnvironment();

		IterativeDataSet<Integer> iteration = env.fromElements(1).iterate(100);

		DataSet<Integer> iterationResult = iteration.map(new AddSuperstepNumberMapper());

		DataSet<Integer> terminationCriterion = iterationResult.filter(new FilterFunction<Integer>() {
			public boolean filter(Integer value) {
				return value < 50;
			}
		});

		List<Integer> collected = new ArrayList<Integer>();

		iteration.closeWith(iterationResult, terminationCriterion)
				.output(new LocalCollectionOutputFormat<Integer>(collected));

		env.execute();

		assertEquals(1, collected.size());
		assertEquals(56, collected.get(0).intValue());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: IterationsCompilerTest.java From flink with Apache License 2.0

5 votes

public static DataSet<Tuple2<Long, Long>> doBulkIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) {
	
	// open a bulk iteration
	IterativeDataSet<Tuple2<Long, Long>> iteration = vertices.iterate(20);
	
	DataSet<Tuple2<Long, Long>> changes = iteration
			.join(edges).where(0).equalTo(0).with(new Join222())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration).where(0).equalTo(0)
			.flatMap(new FlatMapJoin());
	
	// close the bulk iteration
	return iteration.closeWith(changes);
}

Source File: AggregatorsITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testAggregatorWithParameterForIterate() throws Exception {
	/*
	 * Test aggregator with parameter for iterate
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Integer> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env);
	IterativeDataSet<Integer> iteration = initialSolutionSet.iterate(MAX_ITERATIONS);

	// register aggregator
	LongSumAggregatorWithParameter aggr = new LongSumAggregatorWithParameter(0);
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	// register convergence criterion
	iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr,
			new NegativeElementsConvergenceCriterion());

	DataSet<Integer> updatedDs = iteration.map(new SubtractOneMapWithParam());
	List<Integer> result = iteration.closeWith(updatedDs).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(-3, -2, -2, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1, 1);

	assertEquals(expected, result);
}

Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0

5 votes

/**
 * <pre>
 *             +---------Iteration-------+
 *             |                         |
 *    /--map--< >----\                   |
 *   /         |      \         /-------< >---sink
 * src-map     |     join------/         |
 *   \         |      /                  |
 *    \        +-----/-------------------+
 *     \            /
 *      \--reduce--/
 * </pre>
 */
@Test
public void testIterationWithStaticInput() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(100);

		DataSet<Long> source = env.generateSequence(1, 1000000);

		DataSet<Long> mapped = source.map(new IdentityMapper<Long>());

		DataSet<Long> reduced = source.groupBy(new IdentityKeyExtractor<Long>()).reduce(new SelectOneReducer<Long>());

		IterativeDataSet<Long> iteration = mapped.iterate(10);
		iteration.closeWith(
				iteration.join(reduced)
						.where(new IdentityKeyExtractor<Long>())
						.equalTo(new IdentityKeyExtractor<Long>())
						.with(new DummyFlatJoinFunction<Long>()))
				.output(new DiscardingOutputFormat<Long>());

		compileNoStats(env.createProgramPlan());
	}
	catch(Exception e){
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testMultipleIterations() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(100);
	
	DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
	
	DataSet<String> reduced = input
			.map(new IdentityMapper<String>())
			.reduceGroup(new Top1GroupReducer<String>());
		
	IterativeDataSet<String> iteration1 = input.iterate(100);
	IterativeDataSet<String> iteration2 = input.iterate(20);
	IterativeDataSet<String> iteration3 = input.iterate(17);
	
	iteration1.closeWith(iteration1.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "bc1"))
			.output(new DiscardingOutputFormat<String>());
	iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>()).withBroadcastSet(reduced, "bc2"))
			.output(new DiscardingOutputFormat<String>());
	iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>()).withBroadcastSet(reduced, "bc3"))
			.output(new DiscardingOutputFormat<String>());
	
	Plan plan = env.createProgramPlan();
	
	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testMultipleIterationsWithClosueBCVars() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(100);

	DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
		
	IterativeDataSet<String> iteration1 = input.iterate(100);
	IterativeDataSet<String> iteration2 = input.iterate(20);
	IterativeDataSet<String> iteration3 = input.iterate(17);
	
	
	iteration1.closeWith(iteration1.map(new IdentityMapper<String>()))
			.output(new DiscardingOutputFormat<String>());
	iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>()))
			.output(new DiscardingOutputFormat<String>());
	iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>()))
			.output(new DiscardingOutputFormat<String>());
	
	Plan plan = env.createProgramPlan();
	
	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}

org.apache.flink.api.java.operators.IterativeDataSet Java Examples