Java Code Examples for org.apache.flink.api.java.DataSet#map()

The following examples show how to use org.apache.flink.api.java.DataSet#map() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ClientTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("serial")
public static void main(String[] args) throws Exception {
	if (args.length < 2) {
		System.err.println("Usage: TestOptimizerPlan <input-file-path> <output-file-path>");
		return;
	}

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> input = env.readCsvFile(args[0])
			.fieldDelimiter("\t").types(Long.class, Long.class);

	DataSet<Tuple2<Long, Long>> result = input.map(
			new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
				public Tuple2<Long, Long> map(Tuple2<Long, Long> value){
					return new Tuple2<Long, Long>(value.f0, value.f1 + 1);
				}
			});
	result.writeAsCsv(args[1], "\n", "\t");
	env.execute();
}
 
Example 2
Source File: ConnectedComponentsWithRandomisedEdgesITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Long> vertexIds = env.generateSequence(1, NUM_VERTICES);
	DataSet<String> edgeString = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n"));

	DataSet<Edge<Long, NullValue>> edges = edgeString.map(new EdgeParser());

	DataSet<Vertex<Long, Long>> initialVertices = vertexIds.map(new IdAssigner());

	Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

	DataSet<Vertex<Long, Long>> result = graph.run(new ConnectedComponents<>(100));

	result.writeAsCsv(resultPath, "\n", " ");
	env.execute();
}
 
Example 3
Source File: JoinDeadlockITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> longs = env.generateSequence(0, 100000);

	DataSet<Tuple1<Long>> longT1 = longs.map(new TupleWrapper());
	DataSet<Tuple1<Long>> longT2 = longT1.project(0);
	DataSet<Tuple1<Long>> longT3 = longs.map(new TupleWrapper());

	longT2.join(longT3).where(0).equalTo(0).projectFirst(0)
			.join(longT1).where(0).equalTo(0).projectFirst(0)
			.writeAsText(resultPath);

	env.execute();
}
 
Example 4
Source File: ExecutionPlanAfterExecutionTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreatePlanAfterGetExecutionPlan() {
	ExecutionEnvironment env = new LocalEnvironment();

	DataSet<Integer> baseSet = env.fromElements(1, 2);

	DataSet<Integer> result = baseSet.map(new MapFunction<Integer, Integer>() {
		@Override public Integer map(Integer value) throws Exception {
			return value * 2;
		}});
	result.output(new DiscardingOutputFormat<Integer>());

	try {
		env.getExecutionPlan();
		env.createProgramPlan();
	} catch (Exception e) {
		e.printStackTrace();
		fail("Cannot run both #getExecutionPlan and #execute. Message: " + e.getMessage());
	}
}
 
Example 5
Source File: LargePlanTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private static DataSet<String> analyze(DataSet<String> input, DataSet<String> stats, int branches) {
	for (int i = 0; i < branches; i++) {
		final int ii = i;

		if (stats != null) {
			input = input.map(
				new RichMapFunction<String, String>() {
					@Override
					public String map(String value) {
						return value;
					}
			}).withBroadcastSet(stats.map(s -> "(" + s + ").map"), "stats");
		}

		DataSet<String> branch = input
			.map(s -> new Tuple2<>(0, s + ii)).returns(Types.TUPLE(Types.STRING, Types.INT))
			.groupBy(0)
			.minBy(1)
			.map(kv -> kv.f1).returns(Types.STRING);
		if (stats == null) {
			stats = branch;
		} else {
			stats = stats.union(branch);
		}
	}
	return stats.map(s -> "(" + s + ").stats");
}
 
Example 6
Source File: SuccessAfterNetworkBuffersFailureITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private static void runConnectedComponents(ExecutionEnvironment env) throws Exception {

		env.setParallelism(PARALLELISM);
		env.getConfig().disableSysoutLogging();

		// read vertex and edge data
		DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env)
				.rebalance();

		DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env)
				.rebalance()
				.flatMap(new ConnectedComponents.UndirectEdge());

		// assign the initial components (equal to the vertex id)
		DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices
				.map(new ConnectedComponents.DuplicateValue<Long>());

		// open a delta iteration
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
				verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

		// apply the step logic: join with the edges, select the minimum neighbor,
		// update if the component of the candidate is smaller
		DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges)
				.where(0).equalTo(0)
				.with(new ConnectedComponents.NeighborWithComponentIDJoin())

				.groupBy(0).aggregate(Aggregations.MIN, 1)

				.join(iteration.getSolutionSet())
				.where(0).equalTo(0)
				.with(new ConnectedComponents.ComponentIdFilter());

		// close the delta iteration (delta and new workset are identical)
		DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

		result.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());

		env.execute();
	}
 
Example 7
Source File: LargePlanTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private static DataSet<String> analyze(DataSet<String> input, DataSet<String> stats, int branches) {
	for (int i = 0; i < branches; i++) {
		final int ii = i;

		if (stats != null) {
			input = input.map(
				new RichMapFunction<String, String>() {
					@Override
					public String map(String value) {
						return value;
					}
			}).withBroadcastSet(stats.map(s -> "(" + s + ").map"), "stats");
		}

		DataSet<String> branch = input
			.map(s -> new Tuple2<>(0, s + ii)).returns(Types.TUPLE(Types.STRING, Types.INT))
			.groupBy(0)
			.minBy(1)
			.map(kv -> kv.f1).returns(Types.STRING);
		if (stats == null) {
			stats = branch;
		} else {
			stats = stats.union(branch);
		}
	}
	return stats.map(s -> "(" + s + ").stats");
}
 
Example 8
Source File: MapITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testTypeConversionMapperCustomToTuple() throws Exception {
	/*
	 * Test type conversion mapper (Custom -> Tuple)
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<CustomType> ds = CollectionDataSets.getCustomTypeDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> typeConversionMapDs = ds.
			map(new Mapper3());

	List<Tuple3<Integer, Long, String>> result = typeConversionMapDs.collect();

	String expected = "1,0,Hi\n" +
			"2,1,Hello\n" +
			"2,2,Hello world\n" +
			"3,3,Hello world, how are you?\n" +
			"3,4,I am fine.\n" +
			"3,5,Luke Skywalker\n" +
			"4,6,Comment#1\n" +
			"4,7,Comment#2\n" +
			"4,8,Comment#3\n" +
			"4,9,Comment#4\n" +
			"5,10,Comment#5\n" +
			"5,11,Comment#6\n" +
			"5,12,Comment#7\n" +
			"5,13,Comment#8\n" +
			"5,14,Comment#9\n" +
			"6,15,Comment#10\n" +
			"6,16,Comment#11\n" +
			"6,17,Comment#12\n" +
			"6,18,Comment#13\n" +
			"6,19,Comment#14\n" +
			"6,20,Comment#15\n";

	compareResultAsTuples(result, expected);
}
 
Example 9
Source File: HighParallelismIterationsTestProgram.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().disableSysoutLogging();

	// read vertex and edge data
	DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env)
			.rebalance();

	DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env)
			.rebalance()
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices
			.map(new ConnectedComponents.DuplicateValue<>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor,
	// update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges)
			.where(0).equalTo(0)
			.with(new ConnectedComponents.NeighborWithComponentIDJoin())

			.groupBy(0).aggregate(Aggregations.MIN, 1)

			.join(iteration.getSolutionSet())
			.where(0).equalTo(0)
			.with(new ConnectedComponents.ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.output(new DiscardingOutputFormat<>());

	env.execute();
}
 
Example 10
Source File: FpGrowthBatchOp.java    From Alink with Apache License 2.0 5 votes vote down vote up
private static DataSet<Long>
getMinSupportCnt(DataSet<Long> transactionsCnt,
                 final int minSupportCount, final double minSupportPercent) {
    return transactionsCnt.map(new MapFunction<Long, Long>() {
        @Override
        public Long map(Long value) throws Exception {
            if (minSupportCount >= 0) {
                return (long) minSupportCount;
            } else {
                return (long) (Math.floor(value * minSupportPercent));
            }
        }
    });
}
 
Example 11
Source File: ConnectedComponentsITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ConnectedComponents.ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 12
Source File: BranchingPlansCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * <pre>
 *             +---------Iteration-------+
 *             |                         |
 *    /--map--< >----\                   |
 *   /         |      \         /-------< >---sink
 * src-map     |     join------/         |
 *   \         |      /                  |
 *    \        +-----/-------------------+
 *     \            /
 *      \--reduce--/
 * </pre>
 */
@Test
public void testIterationWithStaticInput() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(100);

		DataSet<Long> source = env.generateSequence(1, 1000000);

		DataSet<Long> mapped = source.map(new IdentityMapper<Long>());

		DataSet<Long> reduced = source.groupBy(new IdentityKeyExtractor<Long>()).reduce(new SelectOneReducer<Long>());

		IterativeDataSet<Long> iteration = mapped.iterate(10);
		iteration.closeWith(
				iteration.join(reduced)
						.where(new IdentityKeyExtractor<Long>())
						.equalTo(new IdentityKeyExtractor<Long>())
						.with(new DummyFlatJoinFunction<Long>()))
				.output(new DiscardingOutputFormat<Long>());

		compileNoStats(env.createProgramPlan());
	}
	catch(Exception e){
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 13
Source File: ConnectedComponentsWithSolutionSetFirstITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1);

	DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet()
			.join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 14
Source File: CoGroupConnectedComponentsSecondITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {

	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Long> vertices = env.fromElements(ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES).split("\n"))
			.map(new VertexParser());

	DataSet<Tuple2<Long, Long>> edges = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n"))
			.flatMap(new EdgeParser());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration
			.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new MinIdAndUpdate());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	// emit result
	List<Tuple2<Long, Long>> resutTuples = new ArrayList<>();
	result.output(new LocalCollectionOutputFormat<>(resutTuples));

	env.execute();
}
 
Example 15
Source File: RegisterTypeWithKryoSerializerITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests whether the kryo serializer is forwarded via the ExecutionConfig.
 * @throws Exception
 */
@Test
public void testRegisterTypeWithKryoSerializer() throws Exception {
	int numElements = 10;
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	env.registerTypeWithKryoSerializer(TestClass.class, new TestClassSerializer());

	DataSet<Long> input = env.generateSequence(0, numElements - 1);

	DataSet<TestClass> mapped = input.map(new MapFunction<Long, TestClass>() {
		private static final long serialVersionUID = -529116076312998262L;

		@Override
		public TestClass map(Long value) throws Exception {
			return new TestClass(value);
		}
	});

	List<TestClass> expected = new ArrayList<>(numElements);

	for (int i = 0; i < numElements; i++) {
		expected.add(new TestClass(42));
	}

	compareResultCollections(expected, mapped.collect(), new Comparator<TestClass>() {
		@Override
		public int compare(TestClass o1, TestClass o2) {
			return (int) (o1.getValue() - o2.getValue());
		}
	});
}
 
Example 16
Source File: MapITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws Exception {
	/*
	 * Test mapper if UDF returns input object - increment first field of a tuple
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> inputObjMapDs = ds.
			map(new Mapper7());

	List<Tuple3<Integer, Long, String>> result = inputObjMapDs.collect();

	String expected = "2,1,Hi\n" +
			"3,2,Hello\n" +
			"4,2,Hello world\n" +
			"5,3,Hello world, how are you?\n" +
			"6,3,I am fine.\n" +
			"7,3,Luke Skywalker\n" +
			"8,4,Comment#1\n" +
			"9,4,Comment#2\n" +
			"10,4,Comment#3\n" +
			"11,4,Comment#4\n" +
			"12,5,Comment#5\n" +
			"13,5,Comment#6\n" +
			"14,5,Comment#7\n" +
			"15,5,Comment#8\n" +
			"16,5,Comment#9\n" +
			"17,6,Comment#10\n" +
			"18,6,Comment#11\n" +
			"19,6,Comment#12\n" +
			"20,6,Comment#13\n" +
			"21,6,Comment#14\n" +
			"22,6,Comment#15\n";

	compareResultAsTuples(result, expected);
}
 
Example 17
Source File: PageRank.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);

		final int numPages = params.getInt("numPages", PageRankData.getNumberOfPages());
		final int maxIterations = params.getInt("iterations", 10);

		// set up execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make the parameters available to the web ui
		env.getConfig().setGlobalJobParameters(params);

		// get input data
		DataSet<Long> pagesInput = getPagesDataSet(env, params);
		DataSet<Tuple2<Long, Long>> linksInput = getLinksDataSet(env, params);

		// assign initial rank to pages
		DataSet<Tuple2<Long, Double>> pagesWithRanks = pagesInput.
				map(new RankAssigner((1.0d / numPages)));

		// build adjacency list from link input
		DataSet<Tuple2<Long, Long[]>> adjacencyListInput =
				linksInput.groupBy(0).reduceGroup(new BuildOutgoingEdgeList());

		// set iterative data set
		IterativeDataSet<Tuple2<Long, Double>> iteration = pagesWithRanks.iterate(maxIterations);

		DataSet<Tuple2<Long, Double>> newRanks = iteration
				// join pages with outgoing edges and distribute rank
				.join(adjacencyListInput).where(0).equalTo(0).flatMap(new JoinVertexWithEdgesMatch())
				// collect and sum ranks
				.groupBy(0).aggregate(SUM, 1)
				// apply dampening factor
				.map(new Dampener(DAMPENING_FACTOR, numPages));

		DataSet<Tuple2<Long, Double>> finalPageRanks = iteration.closeWith(
				newRanks,
				newRanks.join(iteration).where(0).equalTo(0)
				// termination condition
				.filter(new EpsilonFilter()));

		// emit result
		if (params.has("output")) {
			finalPageRanks.writeAsCsv(params.get("output"), "\n", " ");
			// execute program
			env.execute("Basic Page Rank Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			finalPageRanks.print();
		}
	}
 
Example 18
Source File: EnumTriangles.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// read input data
		DataSet<Edge> edges;
		if (params.has("edges")) {
			edges = env.readCsvFile(params.get("edges"))
					.fieldDelimiter(" ")
					.includeFields(true, true)
					.types(Integer.class, Integer.class)
					.map(new TupleEdgeConverter());
		} else {
			System.out.println("Executing EnumTriangles example with default edges data set.");
			System.out.println("Use --edges to specify file input.");
			edges = EnumTrianglesData.getDefaultEdgeDataSet(env);
		}

		// project edges by vertex id
		DataSet<Edge> edgesById = edges
				.map(new EdgeByIdProjector());

		DataSet<Triad> triangles = edgesById
				// build triads
				.groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new TriadBuilder())
				// filter triads
				.join(edgesById).where(Triad.V2, Triad.V3).equalTo(Edge.V1, Edge.V2).with(new TriadFilter());

		// emit result
		if (params.has("output")) {
			triangles.writeAsCsv(params.get("output"), "\n", ",");
			// execute program
			env.execute("Basic Triangle Enumeration Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			triangles.print();
		}
	}
 
Example 19
Source File: ConnectedComponentsWithDeferredUpdateITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new UpdateComponentIdMatchNonPreserving());

	DataSet<Tuple2<Long, Long>> delta;
	if (extraMapper) {
		delta = changes.map(
				// ID Mapper
				new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
					private static final long serialVersionUID = -3929364091829757322L;

					@Override
					public Tuple2<Long, Long> map(Tuple2<Long, Long> v) throws Exception {
						return v;
					}
				});
	}
	else {
		delta = changes;
	}

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(delta, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 20
Source File: EnumTriangles.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// read input data
		DataSet<Edge> edges;
		if (params.has("edges")) {
			edges = env.readCsvFile(params.get("edges"))
					.fieldDelimiter(" ")
					.includeFields(true, true)
					.types(Integer.class, Integer.class)
					.map(new TupleEdgeConverter());
		} else {
			System.out.println("Executing EnumTriangles example with default edges data set.");
			System.out.println("Use --edges to specify file input.");
			edges = EnumTrianglesData.getDefaultEdgeDataSet(env);
		}

		// project edges by vertex id
		DataSet<Edge> edgesById = edges
				.map(new EdgeByIdProjector());

		DataSet<Triad> triangles = edgesById
				// build triads
				.groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new TriadBuilder())
				// filter triads
				.join(edgesById).where(Triad.V2, Triad.V3).equalTo(Edge.V1, Edge.V2).with(new TriadFilter());

		// emit result
		if (params.has("output")) {
			triangles.writeAsCsv(params.get("output"), "\n", ",");
			// execute program
			env.execute("Basic Triangle Enumeration Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			triangles.print();
		}
	}