Java Code Examples for org.apache.flink.api.java.DataSet#iterate()

The following examples show how to use org.apache.flink.api.java.DataSet#iterate() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BranchingPlansCompilerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testBranchAfterIteration() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> sourceA = env.generateSequence(0,1);

	IterativeDataSet<Long> loopHead = sourceA.iterate(10);
	DataSet<Long> loopTail = loopHead.map(new IdentityMapper<Long>()).name("Mapper");
	DataSet<Long> loopRes = loopHead.closeWith(loopTail);

	loopRes.output(new DiscardingOutputFormat<Long>());
	loopRes.map(new IdentityMapper<Long>())
			.output(new DiscardingOutputFormat<Long>());

	Plan plan = env.createProgramPlan();

	try {
		compileNoStats(plan);
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 2
Source File: NestedIterationsTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testBulkIterationInClosure() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Long> data1 = env.generateSequence(1, 100);
		DataSet<Long> data2 = env.generateSequence(1, 100);
		
		IterativeDataSet<Long> firstIteration = data1.iterate(100);
		
		DataSet<Long> firstResult = firstIteration.closeWith(firstIteration.map(new IdentityMapper<Long>()));
		
		
		IterativeDataSet<Long> mainIteration = data2.map(new IdentityMapper<Long>()).iterate(100);
		
		DataSet<Long> joined = mainIteration.join(firstResult)
				.where(new IdentityKeyExtractor<Long>()).equalTo(new IdentityKeyExtractor<Long>())
				.with(new DummyFlatJoinFunction<Long>());
		
		DataSet<Long> mainResult = mainIteration.closeWith(joined);
		
		mainResult.output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		
		// optimizer should be able to translate this
		OptimizedPlan op = compileNoStats(p);
		
		// job graph generator should be able to translate this
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 3
Source File: IterationIncompleteStaticPathConsumptionITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// the test data is constructed such that the merge join zig zag
	// has an early out, leaving elements on the static path input unconsumed

	DataSet<Path> edges = env.fromElements(
			new Path(2, 1),
			new Path(4, 1),
			new Path(6, 3),
			new Path(8, 3),
			new Path(10, 1),
			new Path(12, 1),
			new Path(14, 3),
			new Path(16, 3),
			new Path(18, 1),
			new Path(20, 1));

	IterativeDataSet<Path> currentPaths = edges.iterate(10);

	DataSet<Path> newPaths = currentPaths
			.join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from")
				.with(new PathConnector())
			.union(currentPaths).distinct("from", "to");

	DataSet<Path> result = currentPaths.closeWith(newPaths);

	result.output(new DiscardingOutputFormat<Path>());

	env.execute();
}
 
Example 4
Source File: IterationsCompilerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public static DataSet<Tuple2<Long, Long>> doSimpleBulkIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) {

		// open a bulk iteration
		IterativeDataSet<Tuple2<Long, Long>> iteration = vertices.iterate(20);

		DataSet<Tuple2<Long, Long>> changes = iteration
				.join(edges).where(0).equalTo(0)
				.flatMap(new FlatMapJoin());

		// close the bulk iteration
		return iteration.closeWith(changes);
	}
 
Example 5
Source File: AggregatorsITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testAggregatorWithoutParameterForIterate() throws Exception {
	/*
	 * Test aggregator without parameter for iterate
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Integer> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env);
	IterativeDataSet<Integer> iteration = initialSolutionSet.iterate(MAX_ITERATIONS);

	// register aggregator
	LongSumAggregator aggr = new LongSumAggregator();
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	// register convergence criterion
	iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr,
			new NegativeElementsConvergenceCriterion());

	DataSet<Integer> updatedDs = iteration.map(new SubtractOneMap());
	List<Integer> result = iteration.closeWith(updatedDs).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(-3, -2, -2, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1, 1);

	assertEquals(expected, result);
}
 
Example 6
Source File: IterationIncompleteDynamicPathConsumptionITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// the test data is constructed such that the merge join zig zag
	// has an early out, leaving elements on the dynamic path input unconsumed

	DataSet<Path> edges = env.fromElements(
			new Path(1, 2),
			new Path(1, 4),
			new Path(3, 6),
			new Path(3, 8),
			new Path(1, 10),
			new Path(1, 12),
			new Path(3, 14),
			new Path(3, 16),
			new Path(1, 18),
			new Path(1, 20));

	IterativeDataSet<Path> currentPaths = edges.iterate(10);

	DataSet<Path> newPaths = currentPaths
			.join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from")
				.with(new PathConnector())
			.union(currentPaths).distinct("from", "to");

	DataSet<Path> result = currentPaths.closeWith(newPaths);

	result.output(new DiscardingOutputFormat<Path>());

	env.execute();
}
 
Example 7
Source File: IterationIncompleteDynamicPathConsumptionITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// the test data is constructed such that the merge join zig zag
	// has an early out, leaving elements on the dynamic path input unconsumed

	DataSet<Path> edges = env.fromElements(
			new Path(1, 2),
			new Path(1, 4),
			new Path(3, 6),
			new Path(3, 8),
			new Path(1, 10),
			new Path(1, 12),
			new Path(3, 14),
			new Path(3, 16),
			new Path(1, 18),
			new Path(1, 20));

	IterativeDataSet<Path> currentPaths = edges.iterate(10);

	DataSet<Path> newPaths = currentPaths
			.join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from")
				.with(new PathConnector())
			.union(currentPaths).distinct("from", "to");

	DataSet<Path> result = currentPaths.closeWith(newPaths);

	result.output(new DiscardingOutputFormat<Path>());

	env.execute();
}
 
Example 8
Source File: PipelineBreakerTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * 
 * 
 * 
 * <pre>
 *                                +----------- ITERATION ---------+
 *                                |                               |
 *                               +--+                           +----+
 *  (source 1) ----------------->|PS| ------------ +        +-->|next|---> (sink)
 *                               +--+              | (BC)   |   +----+
 *                                |                V        |     |
 *  (source 2) --> (map) --+------|-----------> (MAPPER) ---+     |
 *                         |      |                ^              |
 *                         |      |                | (BC)         |
 *                         |      +----------------|--------------+
 *                         |                       |
 *                         +--(map) --> (reduce) --+
 * </pre>
 */
@Test
public void testPipelineBreakerBroadcastedPartialSolution() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.getConfig().setExecutionMode(ExecutionMode.PIPELINED);
		env.setParallelism(64);
		
		DataSet<Long> initialSource = env.generateSequence(1, 10);
		IterativeDataSet<Long> iteration = initialSource.iterate(100);
		
		
		DataSet<Long> sourceWithMapper = env.generateSequence(1, 10).map(new IdentityMapper<Long>());
		
		DataSet<Long> bcInput1 = sourceWithMapper
									.map(new IdentityMapper<Long>())
									.reduce(new SelectOneReducer<Long>());
		
		DataSet<Long> result = sourceWithMapper
				.map(new IdentityMapper<Long>())
						.withBroadcastSet(iteration, "bc2")
						.withBroadcastSet(bcInput1, "bc1");
						
		
		iteration.closeWith(result).output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		BulkIterationPlanNode iterationPlanNode = (BulkIterationPlanNode) sink.getInput().getSource();
		SingleInputPlanNode mapper = (SingleInputPlanNode) iterationPlanNode.getRootOfStepFunction();
		
		assertEquals(TempMode.CACHED, mapper.getInput().getTempMode());
		assertEquals(DataExchangeMode.BATCH, mapper.getInput().getDataExchangeMode());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 9
Source File: BulkIterationWithAllReducerITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Override
protected void testProgram() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Integer> data = env.fromElements(1, 2, 3, 4, 5, 6, 7, 8);

	IterativeDataSet<Integer> iteration = data.iterate(10);

	DataSet<Integer> result = data.reduceGroup(new PickOneAllReduce()).withBroadcastSet(iteration, "bc");

	final List<Integer> resultList = new ArrayList<Integer>();
	iteration.closeWith(result).output(new LocalCollectionOutputFormat<Integer>(resultList));

	env.execute();

	Assert.assertEquals(8, resultList.get(0).intValue());
}
 
Example 10
Source File: IterationsCompilerTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testResetPartialSolution() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Long> width = env.generateSequence(1, 10);
		DataSet<Long> update = env.generateSequence(1, 10);
		DataSet<Long> lastGradient = env.generateSequence(1, 10);
		
		DataSet<Long> init = width.union(update).union(lastGradient);
		
		IterativeDataSet<Long> iteration = init.iterate(10);
		
		width = iteration.filter(new IdFilter<Long>());
		update = iteration.filter(new IdFilter<Long>());
		lastGradient = iteration.filter(new IdFilter<Long>());
		
		DataSet<Long> gradient = width.map(new IdentityMapper<Long>());
		DataSet<Long> term = gradient.join(lastGradient)
							.where(new IdentityKeyExtractor<Long>())
							.equalTo(new IdentityKeyExtractor<Long>())
							.with(new JoinFunction<Long, Long, Long>() {
								public Long join(Long first, Long second) { return null; }
							});
		
		update = update.map(new RichMapFunction<Long, Long>() {
			public Long map(Long value) { return null; }
		}).withBroadcastSet(term, "some-name");
		
		DataSet<Long> result = iteration.closeWith(width.union(update).union(lastGradient));
		
		result.output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 11
Source File: BroadcastVarInitializationITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
protected void testProgram() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<Integer> data = env.fromElements(1, 2, 3, 4, 5, 6, 7, 8);

	IterativeDataSet<Integer> iteration = data.iterate(10);

	DataSet<Integer> result = data.reduceGroup(new PickOneAllReduce()).withBroadcastSet(iteration, "bc");

	final List<Integer> resultList = new ArrayList<Integer>();
	iteration.closeWith(result).output(new LocalCollectionOutputFormat<Integer>(resultList));

	env.execute();

	Assert.assertEquals(8, resultList.get(0).intValue());
}
 
Example 12
Source File: KMeansArbitraryDimension.java    From flink-perf with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		if(!parseParameters(args)) {
			return;
		}

		// set up execution environment
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataSet<Point> points = env
			.readTextFile(pointsPath)
			.map(new ConvertToPoint());


		DataSet<Centroid> centroids = env
			.readTextFile(centersPath)
			.map(new ConvertToCentroid());


		// set number of bulk iterations for KMeans algorithm
		IterativeDataSet<Centroid> loop = centroids.iterate(numIterations);

		DataSet<Centroid> newCentroids = points
			// compute closest centroid for each point
			.map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids")
			// count and sum point coordinates for each centroid
			.map(new CountAppender())
			.groupBy(0).reduce(new CentroidAccumulator())
			// compute new centroids from point counts and coordinate sums
			.map(new CentroidAverager());
		// feed new centroids back into next iteration
		DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids);

		DataSet<Tuple2<Integer, Point>> clusteredPoints = points
			// assign points to final clusters
			.map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");

		// emit result
		//clusteredPoints.writeAsCsv(outputPath, "\n", " ", FileSystem.WriteMode.OVERWRITE);
		clusteredPoints.writeAsText(outputPath, FileSystem.WriteMode.OVERWRITE);
		// execute program
		env.setParallelism(dop);
		env.execute("KMeans Multi-Dimension");

	}
 
Example 13
Source File: KMeans.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up execution environment
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.getConfig().setGlobalJobParameters(params); // make parameters available in the web interface

		// get input data:
		// read the points and centroids from the provided paths or fall back to default data
		DataSet<Point> points = getPointDataSet(params, env);
		DataSet<Centroid> centroids = getCentroidDataSet(params, env);

		// set number of bulk iterations for KMeans algorithm
		IterativeDataSet<Centroid> loop = centroids.iterate(params.getInt("iterations", 10));

		DataSet<Centroid> newCentroids = points
			// compute closest centroid for each point
			.map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids")
			// count and sum point coordinates for each centroid
			.map(new CountAppender())
			.groupBy(0).reduce(new CentroidAccumulator())
			// compute new centroids from point counts and coordinate sums
			.map(new CentroidAverager());

		// feed new centroids back into next iteration
		DataSet<Centroid> finalCentroids = loop.closeWith(newCentroids);

		DataSet<Tuple2<Integer, Point>> clusteredPoints = points
			// assign points to final clusters
			.map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");

		// emit result
		if (params.has("output")) {
			clusteredPoints.writeAsCsv(params.get("output"), "\n", " ");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute("KMeans Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			clusteredPoints.print();
		}
	}
 
Example 14
Source File: PageRank.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);

		final int numPages = params.getInt("numPages", PageRankData.getNumberOfPages());
		final int maxIterations = params.getInt("iterations", 10);

		// set up execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make the parameters available to the web ui
		env.getConfig().setGlobalJobParameters(params);

		// get input data
		DataSet<Long> pagesInput = getPagesDataSet(env, params);
		DataSet<Tuple2<Long, Long>> linksInput = getLinksDataSet(env, params);

		// assign initial rank to pages
		DataSet<Tuple2<Long, Double>> pagesWithRanks = pagesInput.
				map(new RankAssigner((1.0d / numPages)));

		// build adjacency list from link input
		DataSet<Tuple2<Long, Long[]>> adjacencyListInput =
				linksInput.groupBy(0).reduceGroup(new BuildOutgoingEdgeList());

		// set iterative data set
		IterativeDataSet<Tuple2<Long, Double>> iteration = pagesWithRanks.iterate(maxIterations);

		DataSet<Tuple2<Long, Double>> newRanks = iteration
				// join pages with outgoing edges and distribute rank
				.join(adjacencyListInput).where(0).equalTo(0).flatMap(new JoinVertexWithEdgesMatch())
				// collect and sum ranks
				.groupBy(0).aggregate(SUM, 1)
				// apply dampening factor
				.map(new Dampener(DAMPENING_FACTOR, numPages));

		DataSet<Tuple2<Long, Double>> finalPageRanks = iteration.closeWith(
				newRanks,
				newRanks.join(iteration).where(0).equalTo(0)
				// termination condition
				.filter(new EpsilonFilter()));

		// emit result
		if (params.has("output")) {
			finalPageRanks.writeAsCsv(params.get("output"), "\n", " ");
			// execute program
			env.execute("Basic Page Rank Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			finalPageRanks.print();
		}
	}
 
Example 15
Source File: LinearRegression.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		final int iterations = params.getInt("iterations", 10);

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get input x data from elements
		DataSet<Data> data;
		if (params.has("input")) {
			// read data from CSV file
			data = env.readCsvFile(params.get("input"))
					.fieldDelimiter(" ")
					.includeFields(true, true)
					.pojoType(Data.class);
		} else {
			System.out.println("Executing LinearRegression example with default input data set.");
			System.out.println("Use --input to specify file input.");
			data = LinearRegressionData.getDefaultDataDataSet(env);
		}

		// get the parameters from elements
		DataSet<Params> parameters = LinearRegressionData.getDefaultParamsDataSet(env);

		// set number of bulk iterations for SGD linear Regression
		IterativeDataSet<Params> loop = parameters.iterate(iterations);

		DataSet<Params> newParameters = data
				// compute a single step using every sample
				.map(new SubUpdate()).withBroadcastSet(loop, "parameters")
				// sum up all the steps
				.reduce(new UpdateAccumulator())
				// average the steps and update all parameters
				.map(new Update());

		// feed new parameters back into next iteration
		DataSet<Params> result = loop.closeWith(newParameters);

		// emit result
		if (params.has("output")) {
			result.writeAsText(params.get("output"));
			// execute program
			env.execute("Linear Regression example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}
	}
 
Example 16
Source File: CachedMatchStrategyCompilerTest.java    From flink with Apache License 2.0 3 votes vote down vote up
private Plan getTestPlanRightStatic(String strategy) {
	
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	
	DataSet<Tuple3<Long, Long, Long>> bigInput = env.readCsvFile("file://bigFile").types(Long.class, Long.class, Long.class).name("bigFile");
	
	DataSet<Tuple3<Long, Long, Long>> smallInput = env.readCsvFile("file://smallFile").types(Long.class, Long.class, Long.class).name("smallFile");
	
	IterativeDataSet<Tuple3<Long, Long, Long>> iteration = bigInput.iterate(10);
	
	Configuration joinStrategy = new Configuration();
	joinStrategy.setString(Optimizer.HINT_SHIP_STRATEGY, Optimizer.HINT_SHIP_STRATEGY_REPARTITION_HASH);
	
	if(!strategy.equals("")) {
		joinStrategy.setString(Optimizer.HINT_LOCAL_STRATEGY, strategy);
	}
	
	DataSet<Tuple3<Long, Long, Long>> inner = iteration.join(smallInput).where(0).equalTo(0).with(new DummyJoiner()).name("DummyJoiner").withParameters(joinStrategy);

	DataSet<Tuple3<Long, Long, Long>> output = iteration.closeWith(inner);
	
	output.output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
	
	return env.createProgramPlan();
	
}
 
Example 17
Source File: IterationWithUnionITCase.java    From flink with Apache License 2.0 3 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Integer, CoordVector>> initialInput = env.readFile(new PointInFormat(), this.dataPath).setParallelism(1);

	IterativeDataSet<Tuple2<Integer, CoordVector>> iteration = initialInput.iterate(2);

	DataSet<Tuple2<Integer, CoordVector>> result = iteration.union(iteration).map(new IdentityMapper());

	iteration.closeWith(result).writeAsFormattedText(this.resultPath, new PointFormatter());

	env.execute();
}
 
Example 18
Source File: BranchingPlansCompilerTest.java    From flink with Apache License 2.0 3 votes vote down vote up
@Test
public void testBCVariableClosure() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	
	DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
	
	DataSet<String> reduced = input
			.map(new IdentityMapper<String>())
			.reduceGroup(new Top1GroupReducer<String>());
	
	
	DataSet<String> initialSolution = input.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "bc");
	
	
	IterativeDataSet<String> iteration = initialSolution.iterate(100);
	
	iteration.closeWith(iteration.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "red"))
			.output(new DiscardingOutputFormat<String>());
	
	Plan plan = env.createProgramPlan();
	
	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 19
Source File: BulkIterationTranslationTest.java    From flink with Apache License 2.0 2 votes vote down vote up
@Test
public void testCorrectTranslation() {
	final String jobName = "Test JobName";

	final int numIterations = 13;

	final int defaultParallelism = 133;
	final int iterationParallelism = 77;

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// ------------ construct the test program ------------------

	{
		env.setParallelism(defaultParallelism);

		@SuppressWarnings("unchecked")
		DataSet<Tuple3<Double, Long, String>> initialDataSet = env.fromElements(new Tuple3<>(3.44, 5L, "abc"));

		IterativeDataSet<Tuple3<Double, Long, String>> bulkIteration = initialDataSet.iterate(numIterations);
		bulkIteration.setParallelism(iterationParallelism);

		// test that multiple iteration consumers are supported
		DataSet<Tuple3<Double, Long, String>> identity = bulkIteration
			.map(new IdentityMapper<Tuple3<Double, Long, String>>());

		DataSet<Tuple3<Double, Long, String>> result = bulkIteration.closeWith(identity);

		result.output(new DiscardingOutputFormat<Tuple3<Double, Long, String>>());
		result.writeAsText("/dev/null");
	}

	Plan p = env.createProgramPlan(jobName);

	// ------------- validate the plan ----------------

	BulkIterationBase<?> iteration = (BulkIterationBase<?>) p.getDataSinks().iterator().next().getInput();

	assertEquals(jobName, p.getJobName());
	assertEquals(defaultParallelism, p.getDefaultParallelism());
	assertEquals(iterationParallelism, iteration.getParallelism());
}
 
Example 20
Source File: PageRankStephan.java    From flink-perf with Apache License 2.0 2 votes vote down vote up
public static void main(String[] args) throws Exception {

		String adjacencyPath = args[0]; //"/data/demodata/pagerank/adjacency/adjacency.csv";
		String outpath = args[1]; //"/home/cicero/Desktop/out.txt";


		int numIterations = Integer.valueOf(args[2]);
		long numVertices = Integer.valueOf(args[3]);


		final double threshold = 0.005 / numVertices;


		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Tuple2<Long, long[]>> adjacency = env.readTextFile(adjacencyPath).map(new AdjacencyBuilder());
		DataSet<Tuple2<Long, long[]>> adjacency2 = env.readTextFile(adjacencyPath).map(new AdjacencyBuilder());

		DataSet<Tuple2<Long, Double>> initialRanks = adjacency.map(new VertexInitializer(1.0 / numVertices));


		IterativeDataSet<Tuple2<Long, Double>> iteration = initialRanks.iterate(numIterations);

		DataSet<Tuple2<Long, Double>> newRanks = iteration
				.join(adjacency2, JoinHint.REPARTITION_HASH_SECOND).where(0).equalTo(0).with(new RankDistributor(0.85, numVertices))
				.groupBy(0)
				.reduceGroup(new Adder());

		DataSet<Integer> tc = iteration.join(newRanks).where(0).equalTo(0).with(new FlatJoinFunction<Tuple2<Long, Double>, Tuple2<Long, Double>, Integer>() {
			@Override
			public void join(Tuple2<Long, Double> longDoubleTuple2, Tuple2<Long, Double> longDoubleTuple22, Collector<Integer> collector) throws Exception {
				double delta = Math.abs(longDoubleTuple2.f1 - longDoubleTuple22.f1);
				if(delta > threshold) {
					collector.collect(1);
				}
			}
		});

		iteration.closeWith(newRanks, tc).writeAsCsv(outpath+"_fastbulk", WriteMode.OVERWRITE);

//		System.out.println(env.getExecutionPlan());

		env.execute("Page Rank Optimized");
	}