Java Code Examples for org.apache.flink.graph.Graph#fromTupleDataSet()

The following examples show how to use org.apache.flink.graph.Graph#fromTupleDataSet() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MusicProfiles.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		/**
		 * Read the user-song-play triplets.
		 */
		DataSet<Tuple3<String, String, Integer>> triplets = getUserSongTripletsData(env);

		/**
		 * Read the mismatches dataset and extract the songIDs
		 */
		DataSet<Tuple1<String>> mismatches = getMismatchesData(env).map(new ExtractMismatchSongIds());

		/**
		 * Filter out the mismatches from the triplets dataset
		 */
		DataSet<Tuple3<String, String, Integer>> validTriplets = triplets
				.coGroup(mismatches).where(1).equalTo(0)
				.with(new FilterOutMismatches());

		/**
		 * Create a user -> song weighted bipartite graph where the edge weights
		 * correspond to play counts
		 */
		Graph<String, NullValue, Integer> userSongGraph = Graph.fromTupleDataSet(validTriplets, env);

		/**
		 * Get the top track (most listened) for each user
		 */
		DataSet<Tuple2<String, String>> usersWithTopTrack = userSongGraph
				.groupReduceOnEdges(new GetTopSongPerUser(), EdgeDirection.OUT)
				.filter(new FilterSongNodes());

		if (fileOutput) {
			usersWithTopTrack.writeAsCsv(topTracksOutputPath, "\n", "\t");
		} else {
			usersWithTopTrack.print();
		}

		/**
		 * Create a user-user similarity graph, based on common songs, i.e. two
		 * users that listen to the same song are connected. For each song, we
		 * create an edge between each pair of its in-neighbors.
		 */
		DataSet<Edge<String, NullValue>> similarUsers = userSongGraph
				.getEdges()
				// filter out user-song edges that are below the playcount threshold
				.filter(new FilterFunction<Edge<String, Integer>>() {
					public boolean filter(Edge<String, Integer> edge) {
						return (edge.getValue() > playcountThreshold);
					}
				}).groupBy(1)
				.reduceGroup(new CreateSimilarUserEdges()).distinct();

		Graph<String, Long, NullValue> similarUsersGraph = Graph.fromDataSet(similarUsers,
				new MapFunction<String, Long>() {
					public Long map(String value) {
						return 1L;
					}
				}, env).getUndirected();

		/**
		 * Detect user communities using the label propagation library method
		 */
		// Initialize each vertex with a unique numeric label and run the label propagation algorithm
		DataSet<Tuple2<String, Long>> idsWithInitialLabels = DataSetUtils
				.zipWithUniqueId(similarUsersGraph.getVertexIds())
				.map(new MapFunction<Tuple2<Long, String>, Tuple2<String, Long>>() {
					@Override
					public Tuple2<String, Long> map(Tuple2<Long, String> tuple2) throws Exception {
						return new Tuple2<>(tuple2.f1, tuple2.f0);
					}
				});

		DataSet<Vertex<String, Long>> verticesWithCommunity = similarUsersGraph
				.joinWithVertices(idsWithInitialLabels,
						new VertexJoinFunction<Long, Long>() {
							public Long vertexJoin(Long vertexValue, Long inputValue) {
								return inputValue;
							}
						}).run(new LabelPropagation<>(maxIterations));

		if (fileOutput) {
			verticesWithCommunity.writeAsCsv(communitiesOutputPath, "\n", "\t");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute();
		} else {
			verticesWithCommunity.print();
		}

	}
 
Example 2
Source File: PregelTranslationTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testTranslationPlainEdges() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	VertexCentricConfiguration parameters = new VertexCentricConfiguration();

	parameters.addBroadcastSet(BC_SET_NAME, bcVar);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runVertexCentricIteration(new MyCompute(), null,
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	TwoInputUdfOperator<?, ?, ?, ?> computationCoGroup =
		(TwoInputUdfOperator<?, ?, ?, ?>) ((SingleInputUdfOperator<?, ?, ?>) resultSet.getNextWorkset()).getInput();

	// validate that the broadcast sets are forwarded
	assertEquals(bcVar, computationCoGroup.getBroadcastSets().get(BC_SET_NAME));
}
 
Example 3
Source File: SpargelTranslationTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testTranslationPlainEdges() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcMessaging = env.fromElements(1L);
	DataSet<Long> bcUpdate = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();

	parameters.addBroadcastSetForScatterFunction(BC_SET_MESSAGES_NAME, bcMessaging);
	parameters.addBroadcastSetForGatherFunction(BC_SET_UPDATES_NAME, bcUpdate);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runScatterGatherIteration(new MessageFunctionNoEdgeValue(), new UpdateFunction(),
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	// validate that the semantic properties are set as they should
	TwoInputUdfOperator<?, ?, ?, ?> solutionSetJoin = (TwoInputUdfOperator<?, ?, ?, ?>) resultSet.getNextWorkset();
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(0, 0).contains(0));
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(1, 0).contains(0));

	TwoInputUdfOperator<?, ?, ?, ?> edgesJoin = (TwoInputUdfOperator<?, ?, ?, ?>) solutionSetJoin.getInput1();

	// validate that the broadcast sets are forwarded
	assertEquals(bcUpdate, solutionSetJoin.getBroadcastSets().get(BC_SET_UPDATES_NAME));
	assertEquals(bcMessaging, edgesJoin.getBroadcastSets().get(BC_SET_MESSAGES_NAME));
}
 
Example 4
Source File: SpargelTranslationTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testTranslationPlainEdgesWithForkedBroadcastVariable() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();

	parameters.addBroadcastSetForScatterFunction(BC_SET_MESSAGES_NAME, bcVar);
	parameters.addBroadcastSetForGatherFunction(BC_SET_UPDATES_NAME, bcVar);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runScatterGatherIteration(new MessageFunctionNoEdgeValue(), new UpdateFunction(),
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	// validate that the semantic properties are set as they should
	TwoInputUdfOperator<?, ?, ?, ?> solutionSetJoin = (TwoInputUdfOperator<?, ?, ?, ?>) resultSet.getNextWorkset();
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(0, 0).contains(0));
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(1, 0).contains(0));

	TwoInputUdfOperator<?, ?, ?, ?> edgesJoin = (TwoInputUdfOperator<?, ?, ?, ?>) solutionSetJoin.getInput1();

	// validate that the broadcast sets are forwarded
	assertEquals(bcVar, solutionSetJoin.getBroadcastSets().get(BC_SET_UPDATES_NAME));
	assertEquals(bcVar, edgesJoin.getBroadcastSets().get(BC_SET_MESSAGES_NAME));
}
 
Example 5
Source File: MusicProfiles.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		/**
		 * Read the user-song-play triplets.
		 */
		DataSet<Tuple3<String, String, Integer>> triplets = getUserSongTripletsData(env);

		/**
		 * Read the mismatches dataset and extract the songIDs
		 */
		DataSet<Tuple1<String>> mismatches = getMismatchesData(env).map(new ExtractMismatchSongIds());

		/**
		 * Filter out the mismatches from the triplets dataset
		 */
		DataSet<Tuple3<String, String, Integer>> validTriplets = triplets
				.coGroup(mismatches).where(1).equalTo(0)
				.with(new FilterOutMismatches());

		/**
		 * Create a user -> song weighted bipartite graph where the edge weights
		 * correspond to play counts
		 */
		Graph<String, NullValue, Integer> userSongGraph = Graph.fromTupleDataSet(validTriplets, env);

		/**
		 * Get the top track (most listened) for each user
		 */
		DataSet<Tuple2<String, String>> usersWithTopTrack = userSongGraph
				.groupReduceOnEdges(new GetTopSongPerUser(), EdgeDirection.OUT)
				.filter(new FilterSongNodes());

		if (fileOutput) {
			usersWithTopTrack.writeAsCsv(topTracksOutputPath, "\n", "\t");
		} else {
			usersWithTopTrack.print();
		}

		/**
		 * Create a user-user similarity graph, based on common songs, i.e. two
		 * users that listen to the same song are connected. For each song, we
		 * create an edge between each pair of its in-neighbors.
		 */
		DataSet<Edge<String, NullValue>> similarUsers = userSongGraph
				.getEdges()
				// filter out user-song edges that are below the playcount threshold
				.filter(new FilterFunction<Edge<String, Integer>>() {
					public boolean filter(Edge<String, Integer> edge) {
						return (edge.getValue() > playcountThreshold);
					}
				}).groupBy(1)
				.reduceGroup(new CreateSimilarUserEdges()).distinct();

		Graph<String, Long, NullValue> similarUsersGraph = Graph.fromDataSet(similarUsers,
				new MapFunction<String, Long>() {
					public Long map(String value) {
						return 1L;
					}
				}, env).getUndirected();

		/**
		 * Detect user communities using the label propagation library method
		 */
		// Initialize each vertex with a unique numeric label and run the label propagation algorithm
		DataSet<Tuple2<String, Long>> idsWithInitialLabels = DataSetUtils
				.zipWithUniqueId(similarUsersGraph.getVertexIds())
				.map(new MapFunction<Tuple2<Long, String>, Tuple2<String, Long>>() {
					@Override
					public Tuple2<String, Long> map(Tuple2<Long, String> tuple2) throws Exception {
						return new Tuple2<>(tuple2.f1, tuple2.f0);
					}
				});

		DataSet<Vertex<String, Long>> verticesWithCommunity = similarUsersGraph
				.joinWithVertices(idsWithInitialLabels,
						new VertexJoinFunction<Long, Long>() {
							public Long vertexJoin(Long vertexValue, Long inputValue) {
								return inputValue;
							}
						}).run(new LabelPropagation<>(maxIterations));

		if (fileOutput) {
			verticesWithCommunity.writeAsCsv(communitiesOutputPath, "\n", "\t");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute();
		} else {
			verticesWithCommunity.print();
		}

	}
 
Example 6
Source File: PregelTranslationTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testTranslationPlainEdges() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	VertexCentricConfiguration parameters = new VertexCentricConfiguration();

	parameters.addBroadcastSet(BC_SET_NAME, bcVar);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runVertexCentricIteration(new MyCompute(), null,
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	TwoInputUdfOperator<?, ?, ?, ?> computationCoGroup =
		(TwoInputUdfOperator<?, ?, ?, ?>) ((SingleInputUdfOperator<?, ?, ?>) resultSet.getNextWorkset()).getInput();

	// validate that the broadcast sets are forwarded
	assertEquals(bcVar, computationCoGroup.getBroadcastSets().get(BC_SET_NAME));
}
 
Example 7
Source File: SpargelTranslationTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testTranslationPlainEdges() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcMessaging = env.fromElements(1L);
	DataSet<Long> bcUpdate = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();

	parameters.addBroadcastSetForScatterFunction(BC_SET_MESSAGES_NAME, bcMessaging);
	parameters.addBroadcastSetForGatherFunction(BC_SET_UPDATES_NAME, bcUpdate);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runScatterGatherIteration(new MessageFunctionNoEdgeValue(), new UpdateFunction(),
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	// validate that the semantic properties are set as they should
	TwoInputUdfOperator<?, ?, ?, ?> solutionSetJoin = (TwoInputUdfOperator<?, ?, ?, ?>) resultSet.getNextWorkset();
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(0, 0).contains(0));
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(1, 0).contains(0));

	TwoInputUdfOperator<?, ?, ?, ?> edgesJoin = (TwoInputUdfOperator<?, ?, ?, ?>) solutionSetJoin.getInput1();

	// validate that the broadcast sets are forwarded
	assertEquals(bcUpdate, solutionSetJoin.getBroadcastSets().get(BC_SET_UPDATES_NAME));
	assertEquals(bcMessaging, edgesJoin.getBroadcastSets().get(BC_SET_MESSAGES_NAME));
}
 
Example 8
Source File: SpargelTranslationTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testTranslationPlainEdgesWithForkedBroadcastVariable() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();

	parameters.addBroadcastSetForScatterFunction(BC_SET_MESSAGES_NAME, bcVar);
	parameters.addBroadcastSetForGatherFunction(BC_SET_UPDATES_NAME, bcVar);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runScatterGatherIteration(new MessageFunctionNoEdgeValue(), new UpdateFunction(),
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	// validate that the semantic properties are set as they should
	TwoInputUdfOperator<?, ?, ?, ?> solutionSetJoin = (TwoInputUdfOperator<?, ?, ?, ?>) resultSet.getNextWorkset();
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(0, 0).contains(0));
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(1, 0).contains(0));

	TwoInputUdfOperator<?, ?, ?, ?> edgesJoin = (TwoInputUdfOperator<?, ?, ?, ?>) solutionSetJoin.getInput1();

	// validate that the broadcast sets are forwarded
	assertEquals(bcVar, solutionSetJoin.getBroadcastSets().get(BC_SET_UPDATES_NAME));
	assertEquals(bcVar, edgesJoin.getBroadcastSets().get(BC_SET_MESSAGES_NAME));
}
 
Example 9
Source File: MusicProfiles.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		/**
		 * Read the user-song-play triplets.
		 */
		DataSet<Tuple3<String, String, Integer>> triplets = getUserSongTripletsData(env);

		/**
		 * Read the mismatches dataset and extract the songIDs
		 */
		DataSet<Tuple1<String>> mismatches = getMismatchesData(env).map(new ExtractMismatchSongIds());

		/**
		 * Filter out the mismatches from the triplets dataset
		 */
		DataSet<Tuple3<String, String, Integer>> validTriplets = triplets
				.coGroup(mismatches).where(1).equalTo(0)
				.with(new FilterOutMismatches());

		/**
		 * Create a user -> song weighted bipartite graph where the edge weights
		 * correspond to play counts
		 */
		Graph<String, NullValue, Integer> userSongGraph = Graph.fromTupleDataSet(validTriplets, env);

		/**
		 * Get the top track (most listened) for each user
		 */
		DataSet<Tuple2<String, String>> usersWithTopTrack = userSongGraph
				.groupReduceOnEdges(new GetTopSongPerUser(), EdgeDirection.OUT)
				.filter(new FilterSongNodes());

		if (fileOutput) {
			usersWithTopTrack.writeAsCsv(topTracksOutputPath, "\n", "\t");
		} else {
			usersWithTopTrack.print();
		}

		/**
		 * Create a user-user similarity graph, based on common songs, i.e. two
		 * users that listen to the same song are connected. For each song, we
		 * create an edge between each pair of its in-neighbors.
		 */
		DataSet<Edge<String, NullValue>> similarUsers = userSongGraph
				.getEdges()
				// filter out user-song edges that are below the playcount threshold
				.filter(new FilterFunction<Edge<String, Integer>>() {
					public boolean filter(Edge<String, Integer> edge) {
						return (edge.getValue() > playcountThreshold);
					}
				}).groupBy(1)
				.reduceGroup(new CreateSimilarUserEdges()).distinct();

		Graph<String, Long, NullValue> similarUsersGraph = Graph.fromDataSet(similarUsers,
				new MapFunction<String, Long>() {
					public Long map(String value) {
						return 1L;
					}
				}, env).getUndirected();

		/**
		 * Detect user communities using the label propagation library method
		 */
		// Initialize each vertex with a unique numeric label and run the label propagation algorithm
		DataSet<Tuple2<String, Long>> idsWithInitialLabels = DataSetUtils
				.zipWithUniqueId(similarUsersGraph.getVertexIds())
				.map(new MapFunction<Tuple2<Long, String>, Tuple2<String, Long>>() {
					@Override
					public Tuple2<String, Long> map(Tuple2<Long, String> tuple2) throws Exception {
						return new Tuple2<>(tuple2.f1, tuple2.f0);
					}
				});

		DataSet<Vertex<String, Long>> verticesWithCommunity = similarUsersGraph
				.joinWithVertices(idsWithInitialLabels,
						new VertexJoinFunction<Long, Long>() {
							public Long vertexJoin(Long vertexValue, Long inputValue) {
								return inputValue;
							}
						}).run(new LabelPropagation<>(maxIterations));

		if (fileOutput) {
			verticesWithCommunity.writeAsCsv(communitiesOutputPath, "\n", "\t");

			// since file sinks are lazy, we trigger the execution explicitly
			env.execute();
		} else {
			verticesWithCommunity.print();
		}

	}
 
Example 10
Source File: PregelTranslationTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testTranslationPlainEdges() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	VertexCentricConfiguration parameters = new VertexCentricConfiguration();

	parameters.addBroadcastSet(BC_SET_NAME, bcVar);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runVertexCentricIteration(new MyCompute(), null,
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	TwoInputUdfOperator<?, ?, ?, ?> computationCoGroup =
		(TwoInputUdfOperator<?, ?, ?, ?>) ((SingleInputUdfOperator<?, ?, ?>) resultSet.getNextWorkset()).getInput();

	// validate that the broadcast sets are forwarded
	assertEquals(bcVar, computationCoGroup.getBroadcastSets().get(BC_SET_NAME));
}
 
Example 11
Source File: SpargelTranslationTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testTranslationPlainEdges() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcMessaging = env.fromElements(1L);
	DataSet<Long> bcUpdate = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();

	parameters.addBroadcastSetForScatterFunction(BC_SET_MESSAGES_NAME, bcMessaging);
	parameters.addBroadcastSetForGatherFunction(BC_SET_UPDATES_NAME, bcUpdate);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runScatterGatherIteration(new MessageFunctionNoEdgeValue(), new UpdateFunction(),
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	// validate that the semantic properties are set as they should
	TwoInputUdfOperator<?, ?, ?, ?> solutionSetJoin = (TwoInputUdfOperator<?, ?, ?, ?>) resultSet.getNextWorkset();
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(0, 0).contains(0));
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(1, 0).contains(0));

	TwoInputUdfOperator<?, ?, ?, ?> edgesJoin = (TwoInputUdfOperator<?, ?, ?, ?>) solutionSetJoin.getInput1();

	// validate that the broadcast sets are forwarded
	assertEquals(bcUpdate, solutionSetJoin.getBroadcastSets().get(BC_SET_UPDATES_NAME));
	assertEquals(bcMessaging, edgesJoin.getBroadcastSets().get(BC_SET_MESSAGES_NAME));
}
 
Example 12
Source File: SpargelTranslationTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testTranslationPlainEdgesWithForkedBroadcastVariable() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<String, Double>> result;

	// ------------ construct the test program ------------------

	DataSet<Tuple2<String, Double>> initialVertices = env.fromElements(new Tuple2<>("abc", 3.44));

	DataSet<Tuple2<String, String>> edges = env.fromElements(new Tuple2<>("a", "c"));

	Graph<String, Double, NullValue> graph = Graph.fromTupleDataSet(initialVertices,
		edges.map(new MapFunction<Tuple2<String, String>, Tuple3<String, String, NullValue>>() {

			public Tuple3<String, String, NullValue> map(
				Tuple2<String, String> edge) {
				return new Tuple3<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		}), env);

	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();

	parameters.addBroadcastSetForScatterFunction(BC_SET_MESSAGES_NAME, bcVar);
	parameters.addBroadcastSetForGatherFunction(BC_SET_UPDATES_NAME, bcVar);
	parameters.setName(ITERATION_NAME);
	parameters.setParallelism(ITERATION_parallelism);
	parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());

	result = graph.runScatterGatherIteration(new MessageFunctionNoEdgeValue(), new UpdateFunction(),
		NUM_ITERATIONS, parameters).getVertices();

	result.output(new DiscardingOutputFormat<>());

	// ------------- validate the java program ----------------

	assertTrue(result instanceof DeltaIterationResultSet);

	DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
	DeltaIteration<?, ?> iteration = resultSet.getIterationHead();

	// check the basic iteration properties
	assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
	assertArrayEquals(new int[]{0}, resultSet.getKeyPositions());
	assertEquals(ITERATION_parallelism, iteration.getParallelism());
	assertEquals(ITERATION_NAME, iteration.getName());

	assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());

	// validate that the semantic properties are set as they should
	TwoInputUdfOperator<?, ?, ?, ?> solutionSetJoin = (TwoInputUdfOperator<?, ?, ?, ?>) resultSet.getNextWorkset();
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(0, 0).contains(0));
	assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(1, 0).contains(0));

	TwoInputUdfOperator<?, ?, ?, ?> edgesJoin = (TwoInputUdfOperator<?, ?, ?, ?>) solutionSetJoin.getInput1();

	// validate that the broadcast sets are forwarded
	assertEquals(bcVar, solutionSetJoin.getBroadcastSets().get(BC_SET_UPDATES_NAME));
	assertEquals(bcVar, edgesJoin.getBroadcastSets().get(BC_SET_MESSAGES_NAME));
}
 
Example 13
Source File: BatchJob.java    From Mastering-Apache-Flink with MIT License 3 votes vote down vote up
public static void main(String[] args) throws Exception {
	// set up the batch execution environment
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// Create graph by reading from CSV files
	DataSet<Tuple2<String, Double>> airportVertices = env
			.readCsvFile("D://work//Mastering Flink//Chapter 7//data//nodes.csv").types(String.class, Double.class);

	DataSet<Tuple3<String, String, Double>> airportEdges = env
			.readCsvFile("D://work//Mastering Flink//Chapter 7//data//edges.csv")
			.types(String.class, String.class, Double.class);

	Graph<String, Double, Double> graph = Graph.fromTupleDataSet(airportVertices, airportEdges, env);

	// Find out no. of airports and routes
	System.out.println("No. of Routes in Graph:" + graph.numberOfEdges());
	System.out.println("No. of Airports in Graph:" + graph.numberOfVertices());

	// define the maximum number of iterations
	int maxIterations = 10;

	// Execute the vertex-centric iteration
	Graph<String, Double, Double> result = graph.runVertexCentricIteration(new SSSPComputeFunction(),
			new SSSPCombiner(), maxIterations);

	// Extract the vertices as the result
	DataSet<Vertex<String, Double>> singleSourceShortestPaths = result.getVertices();
	
	singleSourceShortestPaths.print();
	
	

}
 
Example 14
Source File: GraphLoader.java    From OSTMap with Apache License 2.0 3 votes vote down vote up
/**
 * builds a user graph from tweets in json format
 * an user represents an vertex
 * an edge A->B is created if an user A mentions another user B
 *
 * @param path path to folder containing json files to load as user graph
 * @return
 */
public Graph<String, UserNodeValues, UserEdgeValues> getUserGraphFromFiles(String path, ExecutionEnvironment env) {


    DataSet<String> rawData = readRawData(path, env);
    DataSet<JSONObject> jsonData = getJsonData(rawData);


    DataSet<Tuple2<String, UserNodeValues>> vertices = getUserNodes(jsonData);
    DataSet<Tuple3<String, String, UserEdgeValues>> edges = getUserEdges(jsonData);

    Graph<String, UserNodeValues, UserEdgeValues> g = Graph.fromTupleDataSet(vertices, edges, env);

    return g;
}