Java Code Examples for org.apache.flink.graph.Graph#fromTupleDataSet()

The following examples show how to use org.apache.flink.graph.Graph#fromTupleDataSet() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: MusicProfiles.java From Flink-CEPplus with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		/**
		 * Read the user-song-play triplets.
		 */
		DataSet<Tuple3<String, String, Integer>> triplets = getUserSongTripletsData(env);

		/**
		 * Read the mismatches dataset and extract the songIDs
		 */
		DataSet<Tuple1<String>> mismatches = getMismatchesData(env).map(new ExtractMismatchSongIds());

		/**
		 * Filter out the mismatches from the triplets dataset
		 */
		DataSet<Tuple3<String, String, Integer>> validTriplets = triplets
				.coGroup(mismatches).where(1).equalTo(0)
				.with(new FilterOutMismatches());

		/**
		 * Create a user -> song weighted bipartite graph where the edge weights
		 * correspond to play counts
		 */
		Graph<String, NullValue, Integer> userSongGraph = Graph.fromTupleDataSet(validTriplets, env);

		/**
		 * Get the top track (most listened) for each user
		 */
		DataSet<Tuple2<String, String>> usersWithTopTrack = userSongGraph
				.groupReduceOnEdges(new GetTopSongPerUser(), EdgeDirection.OUT)
				.filter(new FilterSongNodes());

		if (fileOutput) {
			usersWithTopTrack.writeAsCsv(topTracksOutputPath, "\n", "\t");
		} else {
			usersWithTopTrack.print();
		}

		/**
		 * Create a user-user similarity graph, based on common songs, i.e. two
		 * users that listen to the same song are connected. For each song, we
		 * create an edge between each pair of its in-neighbors.
		 */
		DataSet<Edge<String, NullValue>> similarUsers = userSongGraph
				.getEdges()
				// filter out user-song edges that are below the playcount threshold
				.filter(new FilterFunction<Edge<String, Integer>>() {
					public boolean filter(Edge<String, Integer> edge) {
						return (edge.getValue() > playcountThreshold);
					}
				}).groupBy(1)
				.reduceGroup(new CreateSimilarUserEdges()).distinct();

		Graph<String, Long, NullValue> similarUsersGraph = Graph.fromDataSet(similarUsers,
				new MapFunction<String, Long>() {
					public Long map(String value) {
						return 1L;
					}
				}, env).getUndirected();

		/**
		 * Detect user communities using the label propagation library method
		 */
		// Initialize each vertex with a unique numeric label and run the label propagation algorithm
		DataSet<Tuple2<String, Long>> idsWithInitialLabels = DataSetUtils
				.zipWithUniqueId(similarUsersGraph.getVertexIds())
				.map(new MapFunction<Tuple2<Long, String>, Tuple2<String, Long>>() {
					@Override
					public Tuple2<String, Long> map(Tuple2<Long, String> tuple2) throws Exception {
						return new Tuple2<>(tuple2.f1, tuple2.f0);
					}
				});

		DataSet<Vertex<String, Long>> verticesWithCommunity = similarUsersGraph
				.joinWithVertices(idsWithInitialLabels,
						new VertexJoinFunction<Long, Long>() {
							public Long vertexJoin(Long vertexValue, Long inputValue) {
								return inputValue;
							}
						}).run(new LabelPropagation<>(maxIterations));

		if (fileOutput) {
			verticesWithCommunity.writeAsCsv(communitiesOutputPath, "\n", "\t");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute();
		} else {
			verticesWithCommunity.print();
		}

	}

Example 2

Source File: PregelTranslationTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testTranslationPlainEdges() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	VertexCentricConfiguration parameters = new VertexCentricConfiguration();

	parameters.addBroadcastSet(BC_SET_NAME, bcVar);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runVertexCentricIteration(new MyCompute(), null,
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	TwoInputUdfOperator<?, ?, ?, ?> computationCoGroup =
		(TwoInputUdfOperator<?, ?, ?, ?>) ((SingleInputUdfOperator<?, ?, ?>) resultSet.getNextWorkset()).getInput();

	// validate that the broadcast sets are forwarded
	assertEquals(bcVar, computationCoGroup.getBroadcastSets().get(BC_SET_NAME));
}

Example 3

Source File: SpargelTranslationTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testTranslationPlainEdges() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcMessaging = env.fromElements(1L);
	DataSet<Long> bcUpdate = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();

	parameters.addBroadcastSetForScatterFunction(BC_SET_MESSAGES_NAME, bcMessaging);
	parameters.addBroadcastSetForGatherFunction(BC_SET_UPDATES_NAME, bcUpdate);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runScatterGatherIteration(new MessageFunctionNoEdgeValue(), new UpdateFunction(),
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	// validate that the semantic properties are set as they should
	TwoInputUdfOperator<?, ?, ?, ?> solutionSetJoin = (TwoInputUdfOperator<?, ?, ?, ?>) resultSet.getNextWorkset();
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(0, 0).contains(0));
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(1, 0).contains(0));

	TwoInputUdfOperator<?, ?, ?, ?> edgesJoin = (TwoInputUdfOperator<?, ?, ?, ?>) solutionSetJoin.getInput1();

	// validate that the broadcast sets are forwarded
	assertEquals(bcUpdate, solutionSetJoin.getBroadcastSets().get(BC_SET_UPDATES_NAME));
	assertEquals(bcMessaging, edgesJoin.getBroadcastSets().get(BC_SET_MESSAGES_NAME));
}

Example 4

Source File: SpargelTranslationTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testTranslationPlainEdgesWithForkedBroadcastVariable() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();

	parameters.addBroadcastSetForScatterFunction(BC_SET_MESSAGES_NAME, bcVar);
	parameters.addBroadcastSetForGatherFunction(BC_SET_UPDATES_NAME, bcVar);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runScatterGatherIteration(new MessageFunctionNoEdgeValue(), new UpdateFunction(),
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	// validate that the semantic properties are set as they should
	TwoInputUdfOperator<?, ?, ?, ?> solutionSetJoin = (TwoInputUdfOperator<?, ?, ?, ?>) resultSet.getNextWorkset();
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(0, 0).contains(0));
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(1, 0).contains(0));

	TwoInputUdfOperator<?, ?, ?, ?> edgesJoin = (TwoInputUdfOperator<?, ?, ?, ?>) solutionSetJoin.getInput1();

	// validate that the broadcast sets are forwarded
	assertEquals(bcVar, solutionSetJoin.getBroadcastSets().get(BC_SET_UPDATES_NAME));
	assertEquals(bcVar, edgesJoin.getBroadcastSets().get(BC_SET_MESSAGES_NAME));
}

Example 5

Source File: MusicProfiles.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		/**
		 * Read the user-song-play triplets.
		 */
		DataSet<Tuple3<String, String, Integer>> triplets = getUserSongTripletsData(env);

		/**
		 * Read the mismatches dataset and extract the songIDs
		 */
		DataSet<Tuple1<String>> mismatches = getMismatchesData(env).map(new ExtractMismatchSongIds());

		/**
		 * Filter out the mismatches from the triplets dataset
		 */
		DataSet<Tuple3<String, String, Integer>> validTriplets = triplets
				.coGroup(mismatches).where(1).equalTo(0)
				.with(new FilterOutMismatches());

		/**
		 * Create a user -> song weighted bipartite graph where the edge weights
		 * correspond to play counts
		 */
		Graph<String, NullValue, Integer> userSongGraph = Graph.fromTupleDataSet(validTriplets, env);

		/**
		 * Get the top track (most listened) for each user
		 */
		DataSet<Tuple2<String, String>> usersWithTopTrack = userSongGraph
				.groupReduceOnEdges(new GetTopSongPerUser(), EdgeDirection.OUT)
				.filter(new FilterSongNodes());

		if (fileOutput) {
			usersWithTopTrack.writeAsCsv(topTracksOutputPath, "\n", "\t");
		} else {
			usersWithTopTrack.print();
		}

		/**
		 * Create a user-user similarity graph, based on common songs, i.e. two
		 * users that listen to the same song are connected. For each song, we
		 * create an edge between each pair of its in-neighbors.
		 */
		DataSet<Edge<String, NullValue>> similarUsers = userSongGraph
				.getEdges()
				// filter out user-song edges that are below the playcount threshold
				.filter(new FilterFunction<Edge<String, Integer>>() {
					public boolean filter(Edge<String, Integer> edge) {
						return (edge.getValue() > playcountThreshold);
					}
				}).groupBy(1)
				.reduceGroup(new CreateSimilarUserEdges()).distinct();

		Graph<String, Long, NullValue> similarUsersGraph = Graph.fromDataSet(similarUsers,
				new MapFunction<String, Long>() {
					public Long map(String value) {
						return 1L;
					}
				}, env).getUndirected();

		/**
		 * Detect user communities using the label propagation library method
		 */
		// Initialize each vertex with a unique numeric label and run the label propagation algorithm
		DataSet<Tuple2<String, Long>> idsWithInitialLabels = DataSetUtils
				.zipWithUniqueId(similarUsersGraph.getVertexIds())
				.map(new MapFunction<Tuple2<Long, String>, Tuple2<String, Long>>() {
					@Override
					public Tuple2<String, Long> map(Tuple2<Long, String> tuple2) throws Exception {
						return new Tuple2<>(tuple2.f1, tuple2.f0);
					}
				});

		DataSet<Vertex<String, Long>> verticesWithCommunity = similarUsersGraph
				.joinWithVertices(idsWithInitialLabels,
						new VertexJoinFunction<Long, Long>() {
							public Long vertexJoin(Long vertexValue, Long inputValue) {
								return inputValue;
							}
						}).run(new LabelPropagation<>(maxIterations));

		if (fileOutput) {
			verticesWithCommunity.writeAsCsv(communitiesOutputPath, "\n", "\t");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute();
		} else {
			verticesWithCommunity.print();
		}

	}

Example 6

Source File: PregelTranslationTest.java From flink with Apache License 2.0

4 votes

@Test
public void testTranslationPlainEdges() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	VertexCentricConfiguration parameters = new VertexCentricConfiguration();

	parameters.addBroadcastSet(BC_SET_NAME, bcVar);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runVertexCentricIteration(new MyCompute(), null,
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	TwoInputUdfOperator<?, ?, ?, ?> computationCoGroup =
		(TwoInputUdfOperator<?, ?, ?, ?>) ((SingleInputUdfOperator<?, ?, ?>) resultSet.getNextWorkset()).getInput();

	// validate that the broadcast sets are forwarded
	assertEquals(bcVar, computationCoGroup.getBroadcastSets().get(BC_SET_NAME));
}

Example 7

Source File: SpargelTranslationTest.java From flink with Apache License 2.0

4 votes

@Test
public void testTranslationPlainEdges() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcMessaging = env.fromElements(1L);
	DataSet<Long> bcUpdate = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();

	parameters.addBroadcastSetForScatterFunction(BC_SET_MESSAGES_NAME, bcMessaging);
	parameters.addBroadcastSetForGatherFunction(BC_SET_UPDATES_NAME, bcUpdate);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runScatterGatherIteration(new MessageFunctionNoEdgeValue(), new UpdateFunction(),
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	// validate that the semantic properties are set as they should
	TwoInputUdfOperator<?, ?, ?, ?> solutionSetJoin = (TwoInputUdfOperator<?, ?, ?, ?>) resultSet.getNextWorkset();
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(0, 0).contains(0));
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(1, 0).contains(0));

	TwoInputUdfOperator<?, ?, ?, ?> edgesJoin = (TwoInputUdfOperator<?, ?, ?, ?>) solutionSetJoin.getInput1();

	// validate that the broadcast sets are forwarded
	assertEquals(bcUpdate, solutionSetJoin.getBroadcastSets().get(BC_SET_UPDATES_NAME));
	assertEquals(bcMessaging, edgesJoin.getBroadcastSets().get(BC_SET_MESSAGES_NAME));
}

Example 8

Source File: SpargelTranslationTest.java From flink with Apache License 2.0

4 votes

@Test
public void testTranslationPlainEdgesWithForkedBroadcastVariable() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();

	parameters.addBroadcastSetForScatterFunction(BC_SET_MESSAGES_NAME, bcVar);
	parameters.addBroadcastSetForGatherFunction(BC_SET_UPDATES_NAME, bcVar);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runScatterGatherIteration(new MessageFunctionNoEdgeValue(), new UpdateFunction(),
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	// validate that the semantic properties are set as they should
	TwoInputUdfOperator<?, ?, ?, ?> solutionSetJoin = (TwoInputUdfOperator<?, ?, ?, ?>) resultSet.getNextWorkset();
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(0, 0).contains(0));
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(1, 0).contains(0));

	TwoInputUdfOperator<?, ?, ?, ?> edgesJoin = (TwoInputUdfOperator<?, ?, ?, ?>) solutionSetJoin.getInput1();

	// validate that the broadcast sets are forwarded
	assertEquals(bcVar, solutionSetJoin.getBroadcastSets().get(BC_SET_UPDATES_NAME));
	assertEquals(bcVar, edgesJoin.getBroadcastSets().get(BC_SET_MESSAGES_NAME));
}

Example 9

Source File: MusicProfiles.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		/**
		 * Read the user-song-play triplets.
		 */
		DataSet<Tuple3<String, String, Integer>> triplets = getUserSongTripletsData(env);

		/**
		 * Read the mismatches dataset and extract the songIDs
		 */
		DataSet<Tuple1<String>> mismatches = getMismatchesData(env).map(new ExtractMismatchSongIds());

		/**
		 * Filter out the mismatches from the triplets dataset
		 */
		DataSet<Tuple3<String, String, Integer>> validTriplets = triplets
				.coGroup(mismatches).where(1).equalTo(0)
				.with(new FilterOutMismatches());

		/**
		 * Create a user -> song weighted bipartite graph where the edge weights
		 * correspond to play counts
		 */
		Graph<String, NullValue, Integer> userSongGraph = Graph.fromTupleDataSet(validTriplets, env);

		/**
		 * Get the top track (most listened) for each user
		 */
		DataSet<Tuple2<String, String>> usersWithTopTrack = userSongGraph
				.groupReduceOnEdges(new GetTopSongPerUser(), EdgeDirection.OUT)
				.filter(new FilterSongNodes());

		if (fileOutput) {
			usersWithTopTrack.writeAsCsv(topTracksOutputPath, "\n", "\t");
		} else {
			usersWithTopTrack.print();
		}

		/**
		 * Create a user-user similarity graph, based on common songs, i.e. two
		 * users that listen to the same song are connected. For each song, we
		 * create an edge between each pair of its in-neighbors.
		 */
		DataSet<Edge<String, NullValue>> similarUsers = userSongGraph
				.getEdges()
				// filter out user-song edges that are below the playcount threshold
				.filter(new FilterFunction<Edge<String, Integer>>() {
					public boolean filter(Edge<String, Integer> edge) {
						return (edge.getValue() > playcountThreshold);
					}
				}).groupBy(1)
				.reduceGroup(new CreateSimilarUserEdges()).distinct();

		Graph<String, Long, NullValue> similarUsersGraph = Graph.fromDataSet(similarUsers,
				new MapFunction<String, Long>() {
					public Long map(String value) {
						return 1L;
					}
				}, env).getUndirected();

		/**
		 * Detect user communities using the label propagation library method
		 */
		// Initialize each vertex with a unique numeric label and run the label propagation algorithm
		DataSet<Tuple2<String, Long>> idsWithInitialLabels = DataSetUtils
				.zipWithUniqueId(similarUsersGraph.getVertexIds())
				.map(new MapFunction<Tuple2<Long, String>, Tuple2<String, Long>>() {
					@Override
					public Tuple2<String, Long> map(Tuple2<Long, String> tuple2) throws Exception {
						return new Tuple2<>(tuple2.f1, tuple2.f0);
					}
				});

		DataSet<Vertex<String, Long>> verticesWithCommunity = similarUsersGraph
				.joinWithVertices(idsWithInitialLabels,
						new VertexJoinFunction<Long, Long>() {
							public Long vertexJoin(Long vertexValue, Long inputValue) {
								return inputValue;
							}
						}).run(new LabelPropagation<>(maxIterations));

		if (fileOutput) {
			verticesWithCommunity.writeAsCsv(communitiesOutputPath, "\n", "\t");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute();
		} else {
			verticesWithCommunity.print();
		}

	}

Example 10

Source File: PregelTranslationTest.java From flink with Apache License 2.0

4 votes

@Test
public void testTranslationPlainEdges() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	VertexCentricConfiguration parameters = new VertexCentricConfiguration();

	parameters.addBroadcastSet(BC_SET_NAME, bcVar);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runVertexCentricIteration(new MyCompute(), null,
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	TwoInputUdfOperator<?, ?, ?, ?> computationCoGroup =
		(TwoInputUdfOperator<?, ?, ?, ?>) ((SingleInputUdfOperator<?, ?, ?>) resultSet.getNextWorkset()).getInput();

	// validate that the broadcast sets are forwarded
	assertEquals(bcVar, computationCoGroup.getBroadcastSets().get(BC_SET_NAME));
}

Example 11

Source File: SpargelTranslationTest.java From flink with Apache License 2.0

4 votes

@Test
public void testTranslationPlainEdges() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcMessaging = env.fromElements(1L);
	DataSet<Long> bcUpdate = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();

	parameters.addBroadcastSetForScatterFunction(BC_SET_MESSAGES_NAME, bcMessaging);
	parameters.addBroadcastSetForGatherFunction(BC_SET_UPDATES_NAME, bcUpdate);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runScatterGatherIteration(new MessageFunctionNoEdgeValue(), new UpdateFunction(),
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	// validate that the semantic properties are set as they should
	TwoInputUdfOperator<?, ?, ?, ?> solutionSetJoin = (TwoInputUdfOperator<?, ?, ?, ?>) resultSet.getNextWorkset();
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(0, 0).contains(0));
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(1, 0).contains(0));

	TwoInputUdfOperator<?, ?, ?, ?> edgesJoin = (TwoInputUdfOperator<?, ?, ?, ?>) solutionSetJoin.getInput1();

	// validate that the broadcast sets are forwarded
	assertEquals(bcUpdate, solutionSetJoin.getBroadcastSets().get(BC_SET_UPDATES_NAME));
	assertEquals(bcMessaging, edgesJoin.getBroadcastSets().get(BC_SET_MESSAGES_NAME));
}

Example 12

Source File: SpargelTranslationTest.java From flink with Apache License 2.0

4 votes

@Test
public void testTranslationPlainEdgesWithForkedBroadcastVariable() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();

	parameters.addBroadcastSetForScatterFunction(BC_SET_MESSAGES_NAME, bcVar);
	parameters.addBroadcastSetForGatherFunction(BC_SET_UPDATES_NAME, bcVar);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runScatterGatherIteration(new MessageFunctionNoEdgeValue(), new UpdateFunction(),
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	// validate that the semantic properties are set as they should
	TwoInputUdfOperator<?, ?, ?, ?> solutionSetJoin = (TwoInputUdfOperator<?, ?, ?, ?>) resultSet.getNextWorkset();
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(0, 0).contains(0));
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(1, 0).contains(0));

	TwoInputUdfOperator<?, ?, ?, ?> edgesJoin = (TwoInputUdfOperator<?, ?, ?, ?>) solutionSetJoin.getInput1();

	// validate that the broadcast sets are forwarded
	assertEquals(bcVar, solutionSetJoin.getBroadcastSets().get(BC_SET_UPDATES_NAME));
	assertEquals(bcVar, edgesJoin.getBroadcastSets().get(BC_SET_MESSAGES_NAME));
}

Example 13

Source File: BatchJob.java From Mastering-Apache-Flink with MIT License

3 votes

public static void main(String[] args) throws Exception {
	// set up the batch execution environment
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// Create graph by reading from CSV files
	DataSet<Tuple2<String, Double>> airportVertices = env
			.readCsvFile("D://work//Mastering Flink//Chapter 7//data//nodes.csv").types(String.class, Double.class);

	DataSet<Tuple3<String, String, Double>> airportEdges = env
			.readCsvFile("D://work//Mastering Flink//Chapter 7//data//edges.csv")
			.types(String.class, String.class, Double.class);

	Graph<String, Double, Double> graph = Graph.fromTupleDataSet(airportVertices, airportEdges, env);

	// Find out no. of airports and routes
	System.out.println("No. of Routes in Graph:" + graph.numberOfEdges());
	System.out.println("No. of Airports in Graph:" + graph.numberOfVertices());

	// define the maximum number of iterations
	int maxIterations = 10;

	// Execute the vertex-centric iteration
	Graph<String, Double, Double> result = graph.runVertexCentricIteration(new SSSPComputeFunction(),
			new SSSPCombiner(), maxIterations);

	// Extract the vertices as the result
	DataSet<Vertex<String, Double>> singleSourceShortestPaths = result.getVertices();
	
	singleSourceShortestPaths.print();
	
	

}

Example 14

Source File: GraphLoader.java From OSTMap with Apache License 2.0

3 votes

/**
 * builds a user graph from tweets in json format
 * an user represents an vertex
 * an edge A->B is created if an user A mentions another user B
 *
 * @param path path to folder containing json files to load as user graph
 * @return
 */
public Graph<String, UserNodeValues, UserEdgeValues> getUserGraphFromFiles(String path, ExecutionEnvironment env) {


    DataSet<String> rawData = readRawData(path, env);
    DataSet<JSONObject> jsonData = getJsonData(rawData);


    DataSet<Tuple2<String, UserNodeValues>> vertices = getUserNodes(jsonData);
    DataSet<Tuple3<String, String, UserEdgeValues>> edges = getUserEdges(jsonData);

    Graph<String, UserNodeValues, UserEdgeValues> g = Graph.fromTupleDataSet(vertices, edges, env);

    return g;
}