org.apache.flink.api.java.operators.IterativeDataSet Java Examples
The following examples show how to use
org.apache.flink.api.java.operators.IterativeDataSet.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IterationCompilerTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testIdentityIteration() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(43); IterativeDataSet<Long> iteration = env.generateSequence(-4, 1000).iterate(100); iteration.closeWith(iteration).output(new DiscardingOutputFormat<Long>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #2
Source File: AggregatorsITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testDistributedCacheWithIterations() throws Exception{ final String testString = "Et tu, Brute?"; final String testName = "testing_caesar"; final File folder = tempFolder.newFolder(); final File resultFile = new File(folder, UUID.randomUUID().toString()); String testPath = resultFile.toString(); String resultPath = resultFile.toURI().toString(); File tempFile = new File(testPath); try (FileWriter writer = new FileWriter(tempFile)) { writer.write(testString); } final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.registerCachedFile(resultPath, testName); IterativeDataSet<Long> solution = env.fromElements(1L).iterate(2); solution.closeWith(env.generateSequence(1, 2).filter(new RichFilterFunction<Long>() { @Override public void open(Configuration parameters) throws Exception{ File file = getRuntimeContext().getDistributedCache().getFile(testName); BufferedReader reader = new BufferedReader(new FileReader(file)); String output = reader.readLine(); reader.close(); assertEquals(output, testString); } @Override public boolean filter(Long value) throws Exception { return false; } }).withBroadcastSet(solution, "SOLUTION")).output(new DiscardingOutputFormat<Long>()); env.execute(); }
Example #3
Source File: IterationWithAllReducerITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); DataSet<String> initialInput = env.fromElements("1", "1", "1", "1", "1", "1", "1", "1"); IterativeDataSet<String> iteration = initialInput.iterate(5).name("Loop"); DataSet<String> sumReduce = iteration.reduce(new ReduceFunction<String>(){ @Override public String reduce(String value1, String value2) throws Exception { return value1; } }).name("Compute sum (Reduce)"); List<String> result = iteration.closeWith(sumReduce).collect(); compareResultAsText(result, EXPECTED); }
Example #4
Source File: CollectionExecutionIterationTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testBulkIteration() { try { ExecutionEnvironment env = ExecutionEnvironment.createCollectionsEnvironment(); IterativeDataSet<Integer> iteration = env.fromElements(1).iterate(10); DataSet<Integer> result = iteration.closeWith(iteration.map(new AddSuperstepNumberMapper())); List<Integer> collected = new ArrayList<Integer>(); result.output(new LocalCollectionOutputFormat<Integer>(collected)); env.execute(); assertEquals(1, collected.size()); assertEquals(56, collected.get(0).intValue()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #5
Source File: CollectionExecutionIterationTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testBulkIteration() { try { ExecutionEnvironment env = ExecutionEnvironment.createCollectionsEnvironment(); IterativeDataSet<Integer> iteration = env.fromElements(1).iterate(10); DataSet<Integer> result = iteration.closeWith(iteration.map(new AddSuperstepNumberMapper())); List<Integer> collected = new ArrayList<Integer>(); result.output(new LocalCollectionOutputFormat<Integer>(collected)); env.execute(); assertEquals(1, collected.size()); assertEquals(56, collected.get(0).intValue()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #6
Source File: CollectionExecutionIterationTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testBulkIteration() { try { ExecutionEnvironment env = ExecutionEnvironment.createCollectionsEnvironment(); IterativeDataSet<Integer> iteration = env.fromElements(1).iterate(10); DataSet<Integer> result = iteration.closeWith(iteration.map(new AddSuperstepNumberMapper())); List<Integer> collected = new ArrayList<Integer>(); result.output(new LocalCollectionOutputFormat<Integer>(collected)); env.execute(); assertEquals(1, collected.size()); assertEquals(56, collected.get(0).intValue()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #7
Source File: IterationIncompleteDynamicPathConsumptionITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // the test data is constructed such that the merge join zig zag // has an early out, leaving elements on the dynamic path input unconsumed DataSet<Path> edges = env.fromElements( new Path(1, 2), new Path(1, 4), new Path(3, 6), new Path(3, 8), new Path(1, 10), new Path(1, 12), new Path(3, 14), new Path(3, 16), new Path(1, 18), new Path(1, 20)); IterativeDataSet<Path> currentPaths = edges.iterate(10); DataSet<Path> newPaths = currentPaths .join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from") .with(new PathConnector()) .union(currentPaths).distinct("from", "to"); DataSet<Path> result = currentPaths.closeWith(newPaths); result.output(new DiscardingOutputFormat<Path>()); env.execute(); }
Example #8
Source File: KMeansForTest.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { if (args.length < 3) { throw new IllegalArgumentException("Missing parameters"); } final String pointsData = args[0]; final String centersData = args[1]; final int numIterations = Integer.parseInt(args[2]); ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // get input data DataSet<Point> points = env.fromElements(pointsData.split("\n")) .map(new TuplePointConverter()); DataSet<Centroid> centroids = env.fromElements(centersData.split("\n")) .map(new TupleCentroidConverter()); // set number of bulk iterations for KMeans algorithm IterativeDataSet<Centroid> loop = centroids.iterate(numIterations); DataSet<Centroid> newCentroids = points // compute closest centroid for each point .map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids") // count and sum point coordinates for each centroid (test pojo return type) .map(new CountAppender()) // !test if key expressions are working! .groupBy("field0").reduce(new CentroidAccumulator()) // compute new centroids from point counts and coordinate sums .map(new CentroidAverager()); // feed new centroids back into next iteration DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids); // test that custom data type collects are working finalCentroids.collect(); }
Example #9
Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0 | 5 votes |
/** * <pre> * +---------Iteration-------+ * | | * /--map--< >----\ | * / | \ /-------< >---sink * src-map | join------/ | * \ | / | * \ +-----/-------------------+ * \ / * \--reduce--/ * </pre> */ @Test public void testIterationWithStaticInput() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(100); DataSet<Long> source = env.generateSequence(1, 1000000); DataSet<Long> mapped = source.map(new IdentityMapper<Long>()); DataSet<Long> reduced = source.groupBy(new IdentityKeyExtractor<Long>()).reduce(new SelectOneReducer<Long>()); IterativeDataSet<Long> iteration = mapped.iterate(10); iteration.closeWith( iteration.join(reduced) .where(new IdentityKeyExtractor<Long>()) .equalTo(new IdentityKeyExtractor<Long>()) .with(new DummyFlatJoinFunction<Long>())) .output(new DiscardingOutputFormat<Long>()); compileNoStats(env.createProgramPlan()); } catch(Exception e){ e.printStackTrace(); fail(e.getMessage()); } }
Example #10
Source File: AggregatorsITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testDistributedCacheWithIterations() throws Exception{ final String testString = "Et tu, Brute?"; final String testName = "testing_caesar"; final File folder = tempFolder.newFolder(); final File resultFile = new File(folder, UUID.randomUUID().toString()); String testPath = resultFile.toString(); String resultPath = resultFile.toURI().toString(); File tempFile = new File(testPath); try (FileWriter writer = new FileWriter(tempFile)) { writer.write(testString); } final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.registerCachedFile(resultPath, testName); IterativeDataSet<Long> solution = env.fromElements(1L).iterate(2); solution.closeWith(env.generateSequence(1, 2).filter(new RichFilterFunction<Long>() { @Override public void open(Configuration parameters) throws Exception{ File file = getRuntimeContext().getDistributedCache().getFile(testName); BufferedReader reader = new BufferedReader(new FileReader(file)); String output = reader.readLine(); reader.close(); assertEquals(output, testString); } @Override public boolean filter(Long value) throws Exception { return false; } }).withBroadcastSet(solution, "SOLUTION")).output(new DiscardingOutputFormat<Long>()); env.execute(); }
Example #11
Source File: BranchingPlansCompilerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testMultipleIterations() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(100); DataSet<String> input = env.readTextFile(IN_FILE).name("source1"); DataSet<String> reduced = input .map(new IdentityMapper<String>()) .reduceGroup(new Top1GroupReducer<String>()); IterativeDataSet<String> iteration1 = input.iterate(100); IterativeDataSet<String> iteration2 = input.iterate(20); IterativeDataSet<String> iteration3 = input.iterate(17); iteration1.closeWith(iteration1.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "bc1")) .output(new DiscardingOutputFormat<String>()); iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>()).withBroadcastSet(reduced, "bc2")) .output(new DiscardingOutputFormat<String>()); iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>()).withBroadcastSet(reduced, "bc3")) .output(new DiscardingOutputFormat<String>()); Plan plan = env.createProgramPlan(); try{ compileNoStats(plan); }catch(Exception e){ e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example #12
Source File: IterationIncompleteDynamicPathConsumptionITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // the test data is constructed such that the merge join zig zag // has an early out, leaving elements on the dynamic path input unconsumed DataSet<Path> edges = env.fromElements( new Path(1, 2), new Path(1, 4), new Path(3, 6), new Path(3, 8), new Path(1, 10), new Path(1, 12), new Path(3, 14), new Path(3, 16), new Path(1, 18), new Path(1, 20)); IterativeDataSet<Path> currentPaths = edges.iterate(10); DataSet<Path> newPaths = currentPaths .join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from") .with(new PathConnector()) .union(currentPaths).distinct("from", "to"); DataSet<Path> result = currentPaths.closeWith(newPaths); result.output(new DiscardingOutputFormat<Path>()); env.execute(); }
Example #13
Source File: BranchingPlansCompilerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Test to ensure that sourceA is inside as well as outside of the iteration the same * node. * * <pre> * (SRC A) (SRC B) * / \ / \ * (SINK 1) (ITERATION) | (SINK 2) * / \ / * (SINK 3) (CROSS => NEXT PARTIAL SOLUTION) * </pre> */ @Test public void testClosure() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> sourceA = env.generateSequence(0,1); DataSet<Long> sourceB = env.generateSequence(0,1); sourceA.output(new DiscardingOutputFormat<Long>()); sourceB.output(new DiscardingOutputFormat<Long>()); IterativeDataSet<Long> loopHead = sourceA.iterate(10).name("Loop"); DataSet<Long> loopTail = loopHead.cross(sourceB).with(new IdentityCrosser<Long>()); DataSet<Long> loopRes = loopHead.closeWith(loopTail); loopRes.output(new DiscardingOutputFormat<Long>()); Plan plan = env.createProgramPlan(); try{ compileNoStats(plan); }catch(Exception e){ e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example #14
Source File: AggregatorConvergenceITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testConnectedComponentsWithParametrizableConvergence() throws Exception { // name of the aggregator that checks for convergence final String updatedElements = "updated.elements.aggr"; // the iteration stops if less than this number of elements change value final long convergenceThreshold = 3; final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput); DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput); IterativeDataSet<Tuple2<Long, Long>> iteration = initialSolutionSet.iterate(10); // register the convergence criterion iteration.registerAggregationConvergenceCriterion(updatedElements, new LongSumAggregator(), new UpdatedElementsConvergenceCriterion(convergenceThreshold)); DataSet<Tuple2<Long, Long>> verticesWithNewComponents = iteration.join(edges).where(0).equalTo(0) .with(new NeighborWithComponentIDJoin()) .groupBy(0).min(1); DataSet<Tuple2<Long, Long>> updatedComponentId = verticesWithNewComponents.join(iteration).where(0).equalTo(0) .flatMap(new MinimumIdFilter(updatedElements)); List<Tuple2<Long, Long>> result = iteration.closeWith(updatedComponentId).collect(); Collections.sort(result, new TestBaseUtils.TupleComparator<Tuple2<Long, Long>>()); assertEquals(expectedResult, result); }
Example #15
Source File: NestedIterationsTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testBulkIterationInClosure() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> data1 = env.generateSequence(1, 100); DataSet<Long> data2 = env.generateSequence(1, 100); IterativeDataSet<Long> firstIteration = data1.iterate(100); DataSet<Long> firstResult = firstIteration.closeWith(firstIteration.map(new IdentityMapper<Long>())); IterativeDataSet<Long> mainIteration = data2.map(new IdentityMapper<Long>()).iterate(100); DataSet<Long> joined = mainIteration.join(firstResult) .where(new IdentityKeyExtractor<Long>()).equalTo(new IdentityKeyExtractor<Long>()) .with(new DummyFlatJoinFunction<Long>()); DataSet<Long> mainResult = mainIteration.closeWith(joined); mainResult.output(new DiscardingOutputFormat<Long>()); Plan p = env.createProgramPlan(); // optimizer should be able to translate this OptimizedPlan op = compileNoStats(p); // job graph generator should be able to translate this new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #16
Source File: IterationsCompilerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
public static DataSet<Tuple2<Long, Long>> doSimpleBulkIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) { // open a bulk iteration IterativeDataSet<Tuple2<Long, Long>> iteration = vertices.iterate(20); DataSet<Tuple2<Long, Long>> changes = iteration .join(edges).where(0).equalTo(0) .flatMap(new FlatMapJoin()); // close the bulk iteration return iteration.closeWith(changes); }
Example #17
Source File: IterationIncompleteStaticPathConsumptionITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // the test data is constructed such that the merge join zig zag // has an early out, leaving elements on the static path input unconsumed DataSet<Path> edges = env.fromElements( new Path(2, 1), new Path(4, 1), new Path(6, 3), new Path(8, 3), new Path(10, 1), new Path(12, 1), new Path(14, 3), new Path(16, 3), new Path(18, 1), new Path(20, 1)); IterativeDataSet<Path> currentPaths = edges.iterate(10); DataSet<Path> newPaths = currentPaths .join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from") .with(new PathConnector()) .union(currentPaths).distinct("from", "to"); DataSet<Path> result = currentPaths.closeWith(newPaths); result.output(new DiscardingOutputFormat<Path>()); env.execute(); }
Example #18
Source File: IdentityIterationITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); IterativeDataSet<Long> iteration = env.generateSequence(1, 10).iterate(100); iteration.closeWith(iteration) .output(new LocalCollectionOutputFormat<Long>(result)); env.execute(); }
Example #19
Source File: AccumulatorIterativeITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(NUM_SUBTASKS); IterativeDataSet<Integer> iteration = env.fromElements(1, 2, 3).iterate(NUM_ITERATIONS); iteration.closeWith(iteration.reduceGroup(new SumReducer())).output(new DiscardingOutputFormat<Integer>()); Assert.assertEquals(NUM_ITERATIONS * 6, (int) env.execute().getAccumulatorResult(ACC_NAME)); }
Example #20
Source File: IterationIncompleteStaticPathConsumptionITCase.java From flink with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // the test data is constructed such that the merge join zig zag // has an early out, leaving elements on the static path input unconsumed DataSet<Path> edges = env.fromElements( new Path(2, 1), new Path(4, 1), new Path(6, 3), new Path(8, 3), new Path(10, 1), new Path(12, 1), new Path(14, 3), new Path(16, 3), new Path(18, 1), new Path(20, 1)); IterativeDataSet<Path> currentPaths = edges.iterate(10); DataSet<Path> newPaths = currentPaths .join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from") .with(new PathConnector()) .union(currentPaths).distinct("from", "to"); DataSet<Path> result = currentPaths.closeWith(newPaths); result.output(new DiscardingOutputFormat<Path>()); env.execute(); }
Example #21
Source File: IterationsCompilerTest.java From flink with Apache License 2.0 | 5 votes |
public static DataSet<Tuple2<Long, Long>> doSimpleBulkIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) { // open a bulk iteration IterativeDataSet<Tuple2<Long, Long>> iteration = vertices.iterate(20); DataSet<Tuple2<Long, Long>> changes = iteration .join(edges).where(0).equalTo(0) .flatMap(new FlatMapJoin()); // close the bulk iteration return iteration.closeWith(changes); }
Example #22
Source File: AggregatorsITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testDistributedCacheWithIterations() throws Exception{ final String testString = "Et tu, Brute?"; final String testName = "testing_caesar"; final File folder = tempFolder.newFolder(); final File resultFile = new File(folder, UUID.randomUUID().toString()); String testPath = resultFile.toString(); String resultPath = resultFile.toURI().toString(); File tempFile = new File(testPath); try (FileWriter writer = new FileWriter(tempFile)) { writer.write(testString); } final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.registerCachedFile(resultPath, testName); IterativeDataSet<Long> solution = env.fromElements(1L).iterate(2); solution.closeWith(env.generateSequence(1, 2).filter(new RichFilterFunction<Long>() { @Override public void open(Configuration parameters) throws Exception{ File file = getRuntimeContext().getDistributedCache().getFile(testName); BufferedReader reader = new BufferedReader(new FileReader(file)); String output = reader.readLine(); reader.close(); assertEquals(output, testString); } @Override public boolean filter(Long value) throws Exception { return false; } }).withBroadcastSet(solution, "SOLUTION")).output(new DiscardingOutputFormat<Long>()); env.execute(); }
Example #23
Source File: AggregatorsITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testAggregatorWithoutParameterForIterate() throws Exception { /* * Test aggregator without parameter for iterate */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); DataSet<Integer> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env); IterativeDataSet<Integer> iteration = initialSolutionSet.iterate(MAX_ITERATIONS); // register aggregator LongSumAggregator aggr = new LongSumAggregator(); iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr); // register convergence criterion iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr, new NegativeElementsConvergenceCriterion()); DataSet<Integer> updatedDs = iteration.map(new SubtractOneMap()); List<Integer> result = iteration.closeWith(updatedDs).collect(); Collections.sort(result); List<Integer> expected = Arrays.asList(-3, -2, -2, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1, 1); assertEquals(expected, result); }
Example #24
Source File: AggregatorsITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testConvergenceCriterionWithParameterForIterate() throws Exception { /* * Test convergence criterion with parameter for iterate */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); DataSet<Integer> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env); IterativeDataSet<Integer> iteration = initialSolutionSet.iterate(MAX_ITERATIONS); // register aggregator LongSumAggregator aggr = new LongSumAggregator(); iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr); // register convergence criterion iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr, new NegativeElementsConvergenceCriterionWithParam(3)); DataSet<Integer> updatedDs = iteration.map(new SubtractOneMap()); List<Integer> result = iteration.closeWith(updatedDs).collect(); Collections.sort(result); List<Integer> expected = Arrays.asList(-3, -2, -2, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1, 1); assertEquals(expected, result); }
Example #25
Source File: CollectionExecutionIterationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testBulkIterationWithTerminationCriterion() { try { ExecutionEnvironment env = ExecutionEnvironment.createCollectionsEnvironment(); IterativeDataSet<Integer> iteration = env.fromElements(1).iterate(100); DataSet<Integer> iterationResult = iteration.map(new AddSuperstepNumberMapper()); DataSet<Integer> terminationCriterion = iterationResult.filter(new FilterFunction<Integer>() { public boolean filter(Integer value) { return value < 50; } }); List<Integer> collected = new ArrayList<Integer>(); iteration.closeWith(iterationResult, terminationCriterion) .output(new LocalCollectionOutputFormat<Integer>(collected)); env.execute(); assertEquals(1, collected.size()); assertEquals(56, collected.get(0).intValue()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #26
Source File: IterationsCompilerTest.java From flink with Apache License 2.0 | 5 votes |
public static DataSet<Tuple2<Long, Long>> doBulkIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) { // open a bulk iteration IterativeDataSet<Tuple2<Long, Long>> iteration = vertices.iterate(20); DataSet<Tuple2<Long, Long>> changes = iteration .join(edges).where(0).equalTo(0).with(new Join222()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration).where(0).equalTo(0) .flatMap(new FlatMapJoin()); // close the bulk iteration return iteration.closeWith(changes); }
Example #27
Source File: AggregatorsITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testAggregatorWithParameterForIterate() throws Exception { /* * Test aggregator with parameter for iterate */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); DataSet<Integer> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env); IterativeDataSet<Integer> iteration = initialSolutionSet.iterate(MAX_ITERATIONS); // register aggregator LongSumAggregatorWithParameter aggr = new LongSumAggregatorWithParameter(0); iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr); // register convergence criterion iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr, new NegativeElementsConvergenceCriterion()); DataSet<Integer> updatedDs = iteration.map(new SubtractOneMapWithParam()); List<Integer> result = iteration.closeWith(updatedDs).collect(); Collections.sort(result); List<Integer> expected = Arrays.asList(-3, -2, -2, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1, 1); assertEquals(expected, result); }
Example #28
Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0 | 5 votes |
/** * <pre> * +---------Iteration-------+ * | | * /--map--< >----\ | * / | \ /-------< >---sink * src-map | join------/ | * \ | / | * \ +-----/-------------------+ * \ / * \--reduce--/ * </pre> */ @Test public void testIterationWithStaticInput() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(100); DataSet<Long> source = env.generateSequence(1, 1000000); DataSet<Long> mapped = source.map(new IdentityMapper<Long>()); DataSet<Long> reduced = source.groupBy(new IdentityKeyExtractor<Long>()).reduce(new SelectOneReducer<Long>()); IterativeDataSet<Long> iteration = mapped.iterate(10); iteration.closeWith( iteration.join(reduced) .where(new IdentityKeyExtractor<Long>()) .equalTo(new IdentityKeyExtractor<Long>()) .with(new DummyFlatJoinFunction<Long>())) .output(new DiscardingOutputFormat<Long>()); compileNoStats(env.createProgramPlan()); } catch(Exception e){ e.printStackTrace(); fail(e.getMessage()); } }
Example #29
Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testMultipleIterations() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(100); DataSet<String> input = env.readTextFile(IN_FILE).name("source1"); DataSet<String> reduced = input .map(new IdentityMapper<String>()) .reduceGroup(new Top1GroupReducer<String>()); IterativeDataSet<String> iteration1 = input.iterate(100); IterativeDataSet<String> iteration2 = input.iterate(20); IterativeDataSet<String> iteration3 = input.iterate(17); iteration1.closeWith(iteration1.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "bc1")) .output(new DiscardingOutputFormat<String>()); iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>()).withBroadcastSet(reduced, "bc2")) .output(new DiscardingOutputFormat<String>()); iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>()).withBroadcastSet(reduced, "bc3")) .output(new DiscardingOutputFormat<String>()); Plan plan = env.createProgramPlan(); try{ compileNoStats(plan); }catch(Exception e){ e.printStackTrace(); Assert.fail(e.getMessage()); } }
Example #30
Source File: BranchingPlansCompilerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testMultipleIterationsWithClosueBCVars() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(100); DataSet<String> input = env.readTextFile(IN_FILE).name("source1"); IterativeDataSet<String> iteration1 = input.iterate(100); IterativeDataSet<String> iteration2 = input.iterate(20); IterativeDataSet<String> iteration3 = input.iterate(17); iteration1.closeWith(iteration1.map(new IdentityMapper<String>())) .output(new DiscardingOutputFormat<String>()); iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>())) .output(new DiscardingOutputFormat<String>()); iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>())) .output(new DiscardingOutputFormat<String>()); Plan plan = env.createProgramPlan(); try{ compileNoStats(plan); }catch(Exception e){ e.printStackTrace(); Assert.fail(e.getMessage()); } }