org.apache.flink.api.java.operators.CoGroupOperator Java Examples

The following examples show how to use org.apache.flink.api.java.operators.CoGroupOperator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

5 votes

/**
 * Method that builds the scatter function using a coGroup operator for a simple vertex (without
 * degrees).
 * It afterwards configures the function with a custom name and broadcast variables.
 *
 * @param iteration
 * @param messageTypeInfo
 * @param whereArg the argument for the where within the coGroup
 * @param equalToArg the argument for the equalTo within the coGroup
 * @return the scatter function
 */
private CoGroupOperator<?, ?, Tuple2<K, Message>> buildScatterFunction(
		DeltaIteration<Vertex<K, VV>, Vertex<K, VV>> iteration,
		TypeInformation<Tuple2<K, Message>> messageTypeInfo, int whereArg, int equalToArg,
		DataSet<LongValue> numberOfVertices) {

	// build the scatter function (co group)
	CoGroupOperator<?, ?, Tuple2<K, Message>> messages;
	ScatterUdfWithEdgeValues<K, VV, VV, Message, EV> messenger =
			new ScatterUdfWithEVsSimpleVV<>(scatterFunction, messageTypeInfo);

	messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg)
			.equalTo(equalToArg).with(messenger);

	// configure coGroup message function with name and broadcast variables
	messages = messages.name("Messaging");
	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) {
			messages = messages.withBroadcastSet(e.f1, e.f0);
		}
		if (this.configuration.isOptNumVertices()) {
			messages = messages.withBroadcastSet(numberOfVertices, "number of vertices");
		}
	}

	return messages;
}

Example #2

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

5 votes

private <VVWithDegree> void configureUpdateFunction(CoGroupOperator<?, ?, Vertex<K, VVWithDegree>> updates) {

		// configure coGroup update function with name and broadcast variables
		updates = updates.name("Vertex State Updates");
		if (this.configuration != null) {
			for (Tuple2<String, DataSet<?>> e : this.configuration.getGatherBcastVars()) {
				updates = updates.withBroadcastSet(e.f1, e.f0);
			}
		}

		// let the operator know that we preserve the key field
		updates.withForwardedFieldsFirst("0").withForwardedFieldsSecond("0");
	}

Example #3

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

5 votes

/**
 * Method that builds the scatter function using a coGroup operator for a vertex
 * containing degree information.
 * It afterwards configures the function with a custom name and broadcast variables.
 *
 * @param iteration
 * @param messageTypeInfo
 * @param whereArg the argument for the where within the coGroup
 * @param equalToArg the argument for the equalTo within the coGroup
 * @return the scatter function
 */
private CoGroupOperator<?, ?, Tuple2<K, Message>> buildScatterFunctionVerticesWithDegrees(
		DeltaIteration<Vertex<K, Tuple3<VV, LongValue, LongValue>>, Vertex<K, Tuple3<VV, LongValue, LongValue>>> iteration,
		TypeInformation<Tuple2<K, Message>> messageTypeInfo, int whereArg, int equalToArg,
		DataSet<LongValue> numberOfVertices) {

	// build the scatter function (co group)
	CoGroupOperator<?, ?, Tuple2<K, Message>> messages;
	ScatterUdfWithEdgeValues<K, Tuple3<VV, LongValue, LongValue>, VV, Message, EV> messenger =
			new ScatterUdfWithEVsVVWithDegrees<>(scatterFunction, messageTypeInfo);

	messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg)
			.equalTo(equalToArg).with(messenger);

	// configure coGroup message function with name and broadcast variables
	messages = messages.name("Messaging");

	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) {
			messages = messages.withBroadcastSet(e.f1, e.f0);
		}
		if (this.configuration.isOptNumVertices()) {
			messages = messages.withBroadcastSet(numberOfVertices, "number of vertices");
		}
	}

	return messages;
}

Example #4

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

5 votes

/**
 * Method that builds the scatter function using a coGroup operator for a simple vertex (without
 * degrees).
 * It afterwards configures the function with a custom name and broadcast variables.
 *
 * @param iteration
 * @param messageTypeInfo
 * @param whereArg the argument for the where within the coGroup
 * @param equalToArg the argument for the equalTo within the coGroup
 * @return the scatter function
 */
private CoGroupOperator<?, ?, Tuple2<K, Message>> buildScatterFunction(
		DeltaIteration<Vertex<K, VV>, Vertex<K, VV>> iteration,
		TypeInformation<Tuple2<K, Message>> messageTypeInfo, int whereArg, int equalToArg,
		DataSet<LongValue> numberOfVertices) {

	// build the scatter function (co group)
	CoGroupOperator<?, ?, Tuple2<K, Message>> messages;
	ScatterUdfWithEdgeValues<K, VV, VV, Message, EV> messenger =
			new ScatterUdfWithEVsSimpleVV<>(scatterFunction, messageTypeInfo);

	messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg)
			.equalTo(equalToArg).with(messenger);

	// configure coGroup message function with name and broadcast variables
	messages = messages.name("Messaging");
	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) {
			messages = messages.withBroadcastSet(e.f1, e.f0);
		}
		if (this.configuration.isOptNumVertices()) {
			messages = messages.withBroadcastSet(numberOfVertices, "number of vertices");
		}
	}

	return messages;
}

Example #5

Source File: CoGroupConnectedComponentsITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(verticesPath).fieldDelimiter(" ").types(Long.class).name("Vertices");

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).name("Edges");

	DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.map(new MapFunction<Tuple1<Long>, Tuple2<Long, Long>>() {
		@Override
		public Tuple2<Long, Long> map(Tuple1<Long> value) throws Exception {
			return new Tuple2<>(value.f0, value.f0);
		}
	}).name("Assign Vertex Ids");

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, MAX_ITERATIONS, 0);

	JoinOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset()
			.join(edges).where(0).equalTo(0)
			.with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
				@Override
				public Tuple2<Long, Long> join(Tuple2<Long, Long> first, Tuple2<Long, Long> second) throws Exception {
					return new Tuple2<>(second.f1, first.f1);
				}
			})
			.name("Join Candidate Id With Neighbor");

	CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors
			.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new MinIdAndUpdate())
			.name("min Id and Update");

	iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(resultPath, "\n", " ").name("Result");

	env.execute("Workset Connected Components");
}

Example #6

Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0

5 votes

@Override
public void translateNode(CoGroupByKey<K> transform, FlinkBatchTranslationContext context) {
	KeyedPCollectionTuple<K> input = context.getInput(transform);

	CoGbkResultSchema schema = input.getCoGbkResultSchema();
	List<KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?>> keyedCollections = input.getKeyedCollections();

	KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?> taggedCollection1 = keyedCollections.get(0);
	KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?> taggedCollection2 = keyedCollections.get(1);

	TupleTag<?> tupleTag1 = taggedCollection1.getTupleTag();
	TupleTag<?> tupleTag2 = taggedCollection2.getTupleTag();

	PCollection<? extends KV<K, ?>> collection1 = taggedCollection1.getCollection();
	PCollection<? extends KV<K, ?>> collection2 = taggedCollection2.getCollection();

	DataSet<KV<K,V1>> inputDataSet1 = context.getInputDataSet(collection1);
	DataSet<KV<K,V2>> inputDataSet2 = context.getInputDataSet(collection2);

	TypeInformation<KV<K,CoGbkResult>> typeInfo = context.getOutputTypeInfo();

	FlinkCoGroupKeyedListAggregator<K,V1,V2> aggregator = new FlinkCoGroupKeyedListAggregator<>(schema, tupleTag1, tupleTag2);

	Keys.ExpressionKeys<KV<K,V1>> keySelector1 = new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet1.getType());
	Keys.ExpressionKeys<KV<K,V2>> keySelector2 = new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet2.getType());

	DataSet<KV<K, CoGbkResult>> out = new CoGroupOperator<>(inputDataSet1, inputDataSet2,
															keySelector1, keySelector2,
			                                                aggregator, typeInfo, null, transform.getName());
	context.setOutputDataSet(context.getOutput(transform), out);
}

Example #7

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

5 votes

private <VVWithDegree> void configureUpdateFunction(CoGroupOperator<?, ?, Vertex<K, VVWithDegree>> updates) {

		// configure coGroup update function with name and broadcast variables
		updates = updates.name("Vertex State Updates");
		if (this.configuration != null) {
			for (Tuple2<String, DataSet<?>> e : this.configuration.getGatherBcastVars()) {
				updates = updates.withBroadcastSet(e.f1, e.f0);
			}
		}

		// let the operator know that we preserve the key field
		updates.withForwardedFieldsFirst("0").withForwardedFieldsSecond("0");
	}

Example #8

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

5 votes

/**
 * Method that builds the scatter function using a coGroup operator for a vertex
 * containing degree information.
 * It afterwards configures the function with a custom name and broadcast variables.
 *
 * @param iteration
 * @param messageTypeInfo
 * @param whereArg the argument for the where within the coGroup
 * @param equalToArg the argument for the equalTo within the coGroup
 * @return the scatter function
 */
private CoGroupOperator<?, ?, Tuple2<K, Message>> buildScatterFunctionVerticesWithDegrees(
		DeltaIteration<Vertex<K, Tuple3<VV, LongValue, LongValue>>, Vertex<K, Tuple3<VV, LongValue, LongValue>>> iteration,
		TypeInformation<Tuple2<K, Message>> messageTypeInfo, int whereArg, int equalToArg,
		DataSet<LongValue> numberOfVertices) {

	// build the scatter function (co group)
	CoGroupOperator<?, ?, Tuple2<K, Message>> messages;
	ScatterUdfWithEdgeValues<K, Tuple3<VV, LongValue, LongValue>, VV, Message, EV> messenger =
			new ScatterUdfWithEVsVVWithDegrees<>(scatterFunction, messageTypeInfo);

	messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg)
			.equalTo(equalToArg).with(messenger);

	// configure coGroup message function with name and broadcast variables
	messages = messages.name("Messaging");

	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) {
			messages = messages.withBroadcastSet(e.f1, e.f0);
		}
		if (this.configuration.isOptNumVertices()) {
			messages = messages.withBroadcastSet(numberOfVertices, "number of vertices");
		}
	}

	return messages;
}

Example #9

Source File: CoGroupConnectedComponentsITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(verticesPath).fieldDelimiter(" ").types(Long.class).name("Vertices");

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).name("Edges");

	DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.map(new MapFunction<Tuple1<Long>, Tuple2<Long, Long>>() {
		@Override
		public Tuple2<Long, Long> map(Tuple1<Long> value) throws Exception {
			return new Tuple2<>(value.f0, value.f0);
		}
	}).name("Assign Vertex Ids");

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, MAX_ITERATIONS, 0);

	JoinOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset()
			.join(edges).where(0).equalTo(0)
			.with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
				@Override
				public Tuple2<Long, Long> join(Tuple2<Long, Long> first, Tuple2<Long, Long> second) throws Exception {
					return new Tuple2<>(second.f1, first.f1);
				}
			})
			.name("Join Candidate Id With Neighbor");

	CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors
			.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new MinIdAndUpdate())
			.name("min Id and Update");

	iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(resultPath, "\n", " ").name("Result");

	env.execute("Workset Connected Components");
}

Example #10

Source File: CoGroupConnectedComponentsITCase.java From flink with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(verticesPath).fieldDelimiter(" ").types(Long.class).name("Vertices");

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).name("Edges");

	DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.map(new MapFunction<Tuple1<Long>, Tuple2<Long, Long>>() {
		@Override
		public Tuple2<Long, Long> map(Tuple1<Long> value) throws Exception {
			return new Tuple2<>(value.f0, value.f0);
		}
	}).name("Assign Vertex Ids");

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, MAX_ITERATIONS, 0);

	JoinOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset()
			.join(edges).where(0).equalTo(0)
			.with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
				@Override
				public Tuple2<Long, Long> join(Tuple2<Long, Long> first, Tuple2<Long, Long> second) throws Exception {
					return new Tuple2<>(second.f1, first.f1);
				}
			})
			.name("Join Candidate Id With Neighbor");

	CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors
			.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new MinIdAndUpdate())
			.name("min Id and Update");

	iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(resultPath, "\n", " ").name("Result");

	env.execute("Workset Connected Components");
}

Example #11

Source File: ScatterGatherIteration.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Method that builds the scatter function using a coGroup operator for a simple vertex (without
 * degrees).
 * It afterwards configures the function with a custom name and broadcast variables.
 *
 * @param iteration
 * @param messageTypeInfo
 * @param whereArg the argument for the where within the coGroup
 * @param equalToArg the argument for the equalTo within the coGroup
 * @return the scatter function
 */
private CoGroupOperator<?, ?, Tuple2<K, Message>> buildScatterFunction(
		DeltaIteration<Vertex<K, VV>, Vertex<K, VV>> iteration,
		TypeInformation<Tuple2<K, Message>> messageTypeInfo, int whereArg, int equalToArg,
		DataSet<LongValue> numberOfVertices) {

	// build the scatter function (co group)
	CoGroupOperator<?, ?, Tuple2<K, Message>> messages;
	ScatterUdfWithEdgeValues<K, VV, VV, Message, EV> messenger =
			new ScatterUdfWithEVsSimpleVV<>(scatterFunction, messageTypeInfo);

	messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg)
			.equalTo(equalToArg).with(messenger);

	// configure coGroup message function with name and broadcast variables
	messages = messages.name("Messaging");
	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) {
			messages = messages.withBroadcastSet(e.f1, e.f0);
		}
		if (this.configuration.isOptNumVertices()) {
			messages = messages.withBroadcastSet(numberOfVertices, "number of vertices");
		}
	}

	return messages;
}

Example #12

Source File: ScatterGatherIteration.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Method that builds the scatter function using a coGroup operator for a vertex
 * containing degree information.
 * It afterwards configures the function with a custom name and broadcast variables.
 *
 * @param iteration
 * @param messageTypeInfo
 * @param whereArg the argument for the where within the coGroup
 * @param equalToArg the argument for the equalTo within the coGroup
 * @return the scatter function
 */
private CoGroupOperator<?, ?, Tuple2<K, Message>> buildScatterFunctionVerticesWithDegrees(
		DeltaIteration<Vertex<K, Tuple3<VV, LongValue, LongValue>>, Vertex<K, Tuple3<VV, LongValue, LongValue>>> iteration,
		TypeInformation<Tuple2<K, Message>> messageTypeInfo, int whereArg, int equalToArg,
		DataSet<LongValue> numberOfVertices) {

	// build the scatter function (co group)
	CoGroupOperator<?, ?, Tuple2<K, Message>> messages;
	ScatterUdfWithEdgeValues<K, Tuple3<VV, LongValue, LongValue>, VV, Message, EV> messenger =
			new ScatterUdfWithEVsVVWithDegrees<>(scatterFunction, messageTypeInfo);

	messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg)
			.equalTo(equalToArg).with(messenger);

	// configure coGroup message function with name and broadcast variables
	messages = messages.name("Messaging");

	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) {
			messages = messages.withBroadcastSet(e.f1, e.f0);
		}
		if (this.configuration.isOptNumVertices()) {
			messages = messages.withBroadcastSet(numberOfVertices, "number of vertices");
		}
	}

	return messages;
}

Example #13

Source File: ScatterGatherIteration.java From Flink-CEPplus with Apache License 2.0

5 votes

private <VVWithDegree> void configureUpdateFunction(CoGroupOperator<?, ?, Vertex<K, VVWithDegree>> updates) {

		// configure coGroup update function with name and broadcast variables
		updates = updates.name("Vertex State Updates");
		if (this.configuration != null) {
			for (Tuple2<String, DataSet<?>> e : this.configuration.getGatherBcastVars()) {
				updates = updates.withBroadcastSet(e.f1, e.f0);
			}
		}

		// let the operator know that we preserve the key field
		updates.withForwardedFieldsFirst("0").withForwardedFieldsSecond("0");
	}

Example #14

Source File: VertexCentricIteration.java From flink with Apache License 2.0

4 votes

/**
 * Creates the operator that represents this vertex-centric graph computation.
 *
 * <p>The Pregel iteration is mapped to delta iteration as follows.
 * The solution set consists of the set of active vertices and the workset contains the set of messages
 * send to vertices during the previous superstep. Initially, the workset contains a null message for each vertex.
 * In the beginning of a superstep, the solution set is joined with the workset to produce
 * a dataset containing tuples of vertex state and messages (vertex inbox).
 * The superstep compute UDF is realized with a coGroup between the vertices with inbox and the graph edges.
 * The output of the compute UDF contains both the new vertex values and the new messages produced.
 * These are directed to the solution set delta and new workset, respectively, with subsequent flatMaps.
 *
 * @return The operator that represents this vertex-centric graph computation.
 */
@Override
public DataSet<Vertex<K, VV>> createResult() {
	if (this.initialVertices == null) {
		throw new IllegalStateException("The input data set has not been set.");
	}

	// prepare the type information
	TypeInformation<K> keyType = ((TupleTypeInfo<?>) initialVertices.getType()).getTypeAt(0);
	TypeInformation<Tuple2<K, Message>> messageTypeInfo =
		new TupleTypeInfo<>(keyType, messageType);
	TypeInformation<Vertex<K, VV>> vertexType = initialVertices.getType();
	TypeInformation<Either<Vertex<K, VV>, Tuple2<K, Message>>> intermediateTypeInfo =
		new EitherTypeInfo<>(vertexType, messageTypeInfo);
	TypeInformation<Either<NullValue, Message>> nullableMsgTypeInfo =
		new EitherTypeInfo<>(TypeExtractor.getForClass(NullValue.class), messageType);
	TypeInformation<Tuple2<K, Either<NullValue, Message>>> workSetTypeInfo =
		new TupleTypeInfo<>(keyType, nullableMsgTypeInfo);

	DataSet<Tuple2<K, Either<NullValue, Message>>> initialWorkSet = initialVertices.map(
			new InitializeWorkSet<K, VV, Message>()).returns(workSetTypeInfo);

	final DeltaIteration<Vertex<K, VV>, Tuple2<K, Either<NullValue, Message>>> iteration =
			initialVertices.iterateDelta(initialWorkSet, this.maximumNumberOfIterations, 0);
	setUpIteration(iteration);

	// join with the current state to get vertex values
	DataSet<Tuple2<Vertex<K, VV>, Either<NullValue, Message>>> verticesWithMsgs =
			iteration.getSolutionSet().join(iteration.getWorkset())
			.where(0).equalTo(0)
			.with(new AppendVertexState<>())
			.returns(new TupleTypeInfo<>(
				vertexType, nullableMsgTypeInfo));

	VertexComputeUdf<K, VV, EV, Message> vertexUdf =
		new VertexComputeUdf<>(computeFunction, intermediateTypeInfo);

	CoGroupOperator<?, ?, Either<Vertex<K, VV>, Tuple2<K, Message>>> superstepComputation =
			verticesWithMsgs.coGroup(edgesWithValue)
			.where("f0.f0").equalTo(0)
			.with(vertexUdf);

	// compute the solution set delta
	DataSet<Vertex<K, VV>> solutionSetDelta = superstepComputation.flatMap(
		new ProjectNewVertexValue<>()).returns(vertexType);

	// compute the inbox of each vertex for the next superstep (new workset)
	DataSet<Tuple2<K, Either<NullValue, Message>>> allMessages = superstepComputation.flatMap(
		new ProjectMessages<>()).returns(workSetTypeInfo);

	DataSet<Tuple2<K, Either<NullValue, Message>>> newWorkSet = allMessages;

	// check if a combiner has been provided
	if (combineFunction != null) {

		MessageCombinerUdf<K, Message> combinerUdf =
			new MessageCombinerUdf<>(combineFunction, workSetTypeInfo);

		DataSet<Tuple2<K, Either<NullValue, Message>>> combinedMessages = allMessages
				.groupBy(0).reduceGroup(combinerUdf)
				.setCombinable(true);

		newWorkSet = combinedMessages;
	}

	// configure the compute function
	superstepComputation = superstepComputation.name("Compute Function");
	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getBcastVars()) {
			superstepComputation = superstepComputation.withBroadcastSet(e.f1, e.f0);
		}
	}

	return iteration.closeWith(solutionSetDelta, newWorkSet);
}

Example #15

Source File: CoGroupOperatorTest.java From flink with Apache License 2.0

4 votes

@Test
public void testSemanticPropsWithKeySelector2() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	CoGroupOperator<?, ?, ?> coGroupOp = tupleDs1.coGroup(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.with(new DummyTestCoGroupFunction2())
			.withForwardedFieldsFirst("2;4->0")
			.withForwardedFieldsSecond("0->4;1;1->3");

	SemanticProperties semProps = coGroupOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2));
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 6).contains(0));

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(1, 3).contains(1));
	assertTrue(semProps.getForwardingTargetFields(1, 3).contains(3));
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 0);

	assertTrue(semProps.getReadFields(0).size() == 3);
	assertTrue(semProps.getReadFields(0).contains(2));
	assertTrue(semProps.getReadFields(0).contains(3));
	assertTrue(semProps.getReadFields(0).contains(4));

	assertTrue(semProps.getReadFields(1) == null);
}

Example #16

Source File: ScatterGatherIteration.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Creates the operator that represents this scatter-gather graph computation for a simple vertex.
 *
 * @param messagingDirection
 * @param messageTypeInfo
 * @param numberOfVertices
 * @return the operator
 */
private DataSet<Vertex<K, VV>> createResultSimpleVertex(EdgeDirection messagingDirection,
	TypeInformation<Tuple2<K, Message>> messageTypeInfo, DataSet<LongValue> numberOfVertices) {

	DataSet<Tuple2<K, Message>> messages;

	TypeInformation<Vertex<K, VV>> vertexTypes = initialVertices.getType();

	final DeltaIteration<Vertex<K, VV>, Vertex<K, VV>> iteration =
			initialVertices.iterateDelta(initialVertices, this.maximumNumberOfIterations, 0);
			setUpIteration(iteration);

	switch (messagingDirection) {
		case IN:
			messages = buildScatterFunction(iteration, messageTypeInfo, 1, 0, numberOfVertices);
			break;
		case OUT:
			messages = buildScatterFunction(iteration, messageTypeInfo, 0, 0, numberOfVertices);
			break;
		case ALL:
			messages = buildScatterFunction(iteration, messageTypeInfo, 1, 0, numberOfVertices)
					.union(buildScatterFunction(iteration, messageTypeInfo, 0, 0, numberOfVertices));
			break;
		default:
			throw new IllegalArgumentException("Illegal edge direction");
	}

	GatherUdf<K, VV, Message> updateUdf = new GatherUdfSimpleVV<>(gatherFunction, vertexTypes);

	// build the update function (co group)
	CoGroupOperator<?, ?, Vertex<K, VV>> updates =
			messages.coGroup(iteration.getSolutionSet()).where(0).equalTo(0).with(updateUdf);

	if (this.configuration != null && this.configuration.isOptNumVertices()) {
		updates = updates.withBroadcastSet(numberOfVertices, "number of vertices");
	}

	configureUpdateFunction(updates);

	return iteration.closeWith(updates, updates);
}

Example #17

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

4 votes

/**
 * Creates the operator that represents this scatter-gather graph computation for a simple vertex.
 *
 * @param messagingDirection
 * @param messageTypeInfo
 * @param numberOfVertices
 * @return the operator
 */
private DataSet<Vertex<K, VV>> createResultSimpleVertex(EdgeDirection messagingDirection,
	TypeInformation<Tuple2<K, Message>> messageTypeInfo, DataSet<LongValue> numberOfVertices) {

	DataSet<Tuple2<K, Message>> messages;

	TypeInformation<Vertex<K, VV>> vertexTypes = initialVertices.getType();

	final DeltaIteration<Vertex<K, VV>, Vertex<K, VV>> iteration =
			initialVertices.iterateDelta(initialVertices, this.maximumNumberOfIterations, 0);
			setUpIteration(iteration);

	switch (messagingDirection) {
		case IN:
			messages = buildScatterFunction(iteration, messageTypeInfo, 1, 0, numberOfVertices);
			break;
		case OUT:
			messages = buildScatterFunction(iteration, messageTypeInfo, 0, 0, numberOfVertices);
			break;
		case ALL:
			messages = buildScatterFunction(iteration, messageTypeInfo, 1, 0, numberOfVertices)
					.union(buildScatterFunction(iteration, messageTypeInfo, 0, 0, numberOfVertices));
			break;
		default:
			throw new IllegalArgumentException("Illegal edge direction");
	}

	GatherUdf<K, VV, Message> updateUdf = new GatherUdfSimpleVV<>(gatherFunction, vertexTypes);

	// build the update function (co group)
	CoGroupOperator<?, ?, Vertex<K, VV>> updates =
			messages.coGroup(iteration.getSolutionSet()).where(0).equalTo(0).with(updateUdf);

	if (this.configuration != null && this.configuration.isOptNumVertices()) {
		updates = updates.withBroadcastSet(numberOfVertices, "number of vertices");
	}

	configureUpdateFunction(updates);

	return iteration.closeWith(updates, updates);
}

Example #18

Source File: CoGroupOperatorTest.java From flink with Apache License 2.0

4 votes

@Test
public void testSemanticPropsWithKeySelector1() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	CoGroupOperator<?, ?, ?> coGroupOp = tupleDs1.coGroup(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.with(new DummyTestCoGroupFunction1());

	SemanticProperties semProps = coGroupOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1));
	assertTrue(semProps.getForwardingTargetFields(0, 3).contains(3));
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0);

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 4).contains(2));
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 6).contains(0));

	assertTrue(semProps.getReadFields(0).size() == 3);
	assertTrue(semProps.getReadFields(0).contains(2));
	assertTrue(semProps.getReadFields(0).contains(4));
	assertTrue(semProps.getReadFields(0).contains(6));

	assertTrue(semProps.getReadFields(1).size() == 2);
	assertTrue(semProps.getReadFields(1).contains(3));
	assertTrue(semProps.getReadFields(1).contains(5));
}

Example #19

Source File: VertexCentricIteration.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Creates the operator that represents this vertex-centric graph computation.
 *
 * <p>The Pregel iteration is mapped to delta iteration as follows.
 * The solution set consists of the set of active vertices and the workset contains the set of messages
 * send to vertices during the previous superstep. Initially, the workset contains a null message for each vertex.
 * In the beginning of a superstep, the solution set is joined with the workset to produce
 * a dataset containing tuples of vertex state and messages (vertex inbox).
 * The superstep compute UDF is realized with a coGroup between the vertices with inbox and the graph edges.
 * The output of the compute UDF contains both the new vertex values and the new messages produced.
 * These are directed to the solution set delta and new workset, respectively, with subsequent flatMaps.
 *
 * @return The operator that represents this vertex-centric graph computation.
 */
@Override
public DataSet<Vertex<K, VV>> createResult() {
	if (this.initialVertices == null) {
		throw new IllegalStateException("The input data set has not been set.");
	}

	// prepare the type information
	TypeInformation<K> keyType = ((TupleTypeInfo<?>) initialVertices.getType()).getTypeAt(0);
	TypeInformation<Tuple2<K, Message>> messageTypeInfo =
		new TupleTypeInfo<>(keyType, messageType);
	TypeInformation<Vertex<K, VV>> vertexType = initialVertices.getType();
	TypeInformation<Either<Vertex<K, VV>, Tuple2<K, Message>>> intermediateTypeInfo =
		new EitherTypeInfo<>(vertexType, messageTypeInfo);
	TypeInformation<Either<NullValue, Message>> nullableMsgTypeInfo =
		new EitherTypeInfo<>(TypeExtractor.getForClass(NullValue.class), messageType);
	TypeInformation<Tuple2<K, Either<NullValue, Message>>> workSetTypeInfo =
		new TupleTypeInfo<>(keyType, nullableMsgTypeInfo);

	DataSet<Tuple2<K, Either<NullValue, Message>>> initialWorkSet = initialVertices.map(
			new InitializeWorkSet<K, VV, Message>()).returns(workSetTypeInfo);

	final DeltaIteration<Vertex<K, VV>, Tuple2<K, Either<NullValue, Message>>> iteration =
			initialVertices.iterateDelta(initialWorkSet, this.maximumNumberOfIterations, 0);
	setUpIteration(iteration);

	// join with the current state to get vertex values
	DataSet<Tuple2<Vertex<K, VV>, Either<NullValue, Message>>> verticesWithMsgs =
			iteration.getSolutionSet().join(iteration.getWorkset())
			.where(0).equalTo(0)
			.with(new AppendVertexState<>())
			.returns(new TupleTypeInfo<>(
				vertexType, nullableMsgTypeInfo));

	VertexComputeUdf<K, VV, EV, Message> vertexUdf =
		new VertexComputeUdf<>(computeFunction, intermediateTypeInfo);

	CoGroupOperator<?, ?, Either<Vertex<K, VV>, Tuple2<K, Message>>> superstepComputation =
			verticesWithMsgs.coGroup(edgesWithValue)
			.where("f0.f0").equalTo(0)
			.with(vertexUdf);

	// compute the solution set delta
	DataSet<Vertex<K, VV>> solutionSetDelta = superstepComputation.flatMap(
		new ProjectNewVertexValue<>()).returns(vertexType);

	// compute the inbox of each vertex for the next superstep (new workset)
	DataSet<Tuple2<K, Either<NullValue, Message>>> allMessages = superstepComputation.flatMap(
		new ProjectMessages<>()).returns(workSetTypeInfo);

	DataSet<Tuple2<K, Either<NullValue, Message>>> newWorkSet = allMessages;

	// check if a combiner has been provided
	if (combineFunction != null) {

		MessageCombinerUdf<K, Message> combinerUdf =
			new MessageCombinerUdf<>(combineFunction, workSetTypeInfo);

		DataSet<Tuple2<K, Either<NullValue, Message>>> combinedMessages = allMessages
				.groupBy(0).reduceGroup(combinerUdf)
				.setCombinable(true);

		newWorkSet = combinedMessages;
	}

	// configure the compute function
	superstepComputation = superstepComputation.name("Compute Function");
	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getBcastVars()) {
			superstepComputation = superstepComputation.withBroadcastSet(e.f1, e.f0);
		}
	}

	return iteration.closeWith(solutionSetDelta, newWorkSet);
}

Example #20

Source File: ScatterGatherIteration.java From flink with Apache License 2.0

4 votes

/**
 * Creates the operator that represents this scatter-gather graph computation for a simple vertex.
 *
 * @param messagingDirection
 * @param messageTypeInfo
 * @param numberOfVertices
 * @return the operator
 */
private DataSet<Vertex<K, VV>> createResultSimpleVertex(EdgeDirection messagingDirection,
	TypeInformation<Tuple2<K, Message>> messageTypeInfo, DataSet<LongValue> numberOfVertices) {

	DataSet<Tuple2<K, Message>> messages;

	TypeInformation<Vertex<K, VV>> vertexTypes = initialVertices.getType();

	final DeltaIteration<Vertex<K, VV>, Vertex<K, VV>> iteration =
			initialVertices.iterateDelta(initialVertices, this.maximumNumberOfIterations, 0);
			setUpIteration(iteration);

	switch (messagingDirection) {
		case IN:
			messages = buildScatterFunction(iteration, messageTypeInfo, 1, 0, numberOfVertices);
			break;
		case OUT:
			messages = buildScatterFunction(iteration, messageTypeInfo, 0, 0, numberOfVertices);
			break;
		case ALL:
			messages = buildScatterFunction(iteration, messageTypeInfo, 1, 0, numberOfVertices)
					.union(buildScatterFunction(iteration, messageTypeInfo, 0, 0, numberOfVertices));
			break;
		default:
			throw new IllegalArgumentException("Illegal edge direction");
	}

	GatherUdf<K, VV, Message> updateUdf = new GatherUdfSimpleVV<>(gatherFunction, vertexTypes);

	// build the update function (co group)
	CoGroupOperator<?, ?, Vertex<K, VV>> updates =
			messages.coGroup(iteration.getSolutionSet()).where(0).equalTo(0).with(updateUdf);

	if (this.configuration != null && this.configuration.isOptNumVertices()) {
		updates = updates.withBroadcastSet(numberOfVertices, "number of vertices");
	}

	configureUpdateFunction(updates);

	return iteration.closeWith(updates, updates);
}

Example #21

Source File: CoGroupOperatorTest.java From flink with Apache License 2.0

4 votes

@Test
public void testSemanticPropsWithKeySelector1() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	CoGroupOperator<?, ?, ?> coGroupOp = tupleDs1.coGroup(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.with(new DummyTestCoGroupFunction1());

	SemanticProperties semProps = coGroupOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1));
	assertTrue(semProps.getForwardingTargetFields(0, 3).contains(3));
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0);

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 4).contains(2));
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 6).contains(0));

	assertTrue(semProps.getReadFields(0).size() == 3);
	assertTrue(semProps.getReadFields(0).contains(2));
	assertTrue(semProps.getReadFields(0).contains(4));
	assertTrue(semProps.getReadFields(0).contains(6));

	assertTrue(semProps.getReadFields(1).size() == 2);
	assertTrue(semProps.getReadFields(1).contains(3));
	assertTrue(semProps.getReadFields(1).contains(5));
}

Example #22

Source File: CoGroupOperatorTest.java From flink with Apache License 2.0

4 votes

@Test
public void testSemanticPropsWithKeySelector2() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	CoGroupOperator<?, ?, ?> coGroupOp = tupleDs1.coGroup(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.with(new DummyTestCoGroupFunction2())
			.withForwardedFieldsFirst("2;4->0")
			.withForwardedFieldsSecond("0->4;1;1->3");

	SemanticProperties semProps = coGroupOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2));
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 6).contains(0));

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(1, 3).contains(1));
	assertTrue(semProps.getForwardingTargetFields(1, 3).contains(3));
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 0);

	assertTrue(semProps.getReadFields(0).size() == 3);
	assertTrue(semProps.getReadFields(0).contains(2));
	assertTrue(semProps.getReadFields(0).contains(3));
	assertTrue(semProps.getReadFields(0).contains(4));

	assertTrue(semProps.getReadFields(1) == null);
}

Example #23

Source File: VertexCentricIteration.java From flink with Apache License 2.0

4 votes

/**
 * Creates the operator that represents this vertex-centric graph computation.
 *
 * <p>The Pregel iteration is mapped to delta iteration as follows.
 * The solution set consists of the set of active vertices and the workset contains the set of messages
 * send to vertices during the previous superstep. Initially, the workset contains a null message for each vertex.
 * In the beginning of a superstep, the solution set is joined with the workset to produce
 * a dataset containing tuples of vertex state and messages (vertex inbox).
 * The superstep compute UDF is realized with a coGroup between the vertices with inbox and the graph edges.
 * The output of the compute UDF contains both the new vertex values and the new messages produced.
 * These are directed to the solution set delta and new workset, respectively, with subsequent flatMaps.
 *
 * @return The operator that represents this vertex-centric graph computation.
 */
@Override
public DataSet<Vertex<K, VV>> createResult() {
	if (this.initialVertices == null) {
		throw new IllegalStateException("The input data set has not been set.");
	}

	// prepare the type information
	TypeInformation<K> keyType = ((TupleTypeInfo<?>) initialVertices.getType()).getTypeAt(0);
	TypeInformation<Tuple2<K, Message>> messageTypeInfo =
		new TupleTypeInfo<>(keyType, messageType);
	TypeInformation<Vertex<K, VV>> vertexType = initialVertices.getType();
	TypeInformation<Either<Vertex<K, VV>, Tuple2<K, Message>>> intermediateTypeInfo =
		new EitherTypeInfo<>(vertexType, messageTypeInfo);
	TypeInformation<Either<NullValue, Message>> nullableMsgTypeInfo =
		new EitherTypeInfo<>(TypeExtractor.getForClass(NullValue.class), messageType);
	TypeInformation<Tuple2<K, Either<NullValue, Message>>> workSetTypeInfo =
		new TupleTypeInfo<>(keyType, nullableMsgTypeInfo);

	DataSet<Tuple2<K, Either<NullValue, Message>>> initialWorkSet = initialVertices.map(
			new InitializeWorkSet<K, VV, Message>()).returns(workSetTypeInfo);

	final DeltaIteration<Vertex<K, VV>, Tuple2<K, Either<NullValue, Message>>> iteration =
			initialVertices.iterateDelta(initialWorkSet, this.maximumNumberOfIterations, 0);
	setUpIteration(iteration);

	// join with the current state to get vertex values
	DataSet<Tuple2<Vertex<K, VV>, Either<NullValue, Message>>> verticesWithMsgs =
			iteration.getSolutionSet().join(iteration.getWorkset())
			.where(0).equalTo(0)
			.with(new AppendVertexState<>())
			.returns(new TupleTypeInfo<>(
				vertexType, nullableMsgTypeInfo));

	VertexComputeUdf<K, VV, EV, Message> vertexUdf =
		new VertexComputeUdf<>(computeFunction, intermediateTypeInfo);

	CoGroupOperator<?, ?, Either<Vertex<K, VV>, Tuple2<K, Message>>> superstepComputation =
			verticesWithMsgs.coGroup(edgesWithValue)
			.where("f0.f0").equalTo(0)
			.with(vertexUdf);

	// compute the solution set delta
	DataSet<Vertex<K, VV>> solutionSetDelta = superstepComputation.flatMap(
		new ProjectNewVertexValue<>()).returns(vertexType);

	// compute the inbox of each vertex for the next superstep (new workset)
	DataSet<Tuple2<K, Either<NullValue, Message>>> allMessages = superstepComputation.flatMap(
		new ProjectMessages<>()).returns(workSetTypeInfo);

	DataSet<Tuple2<K, Either<NullValue, Message>>> newWorkSet = allMessages;

	// check if a combiner has been provided
	if (combineFunction != null) {

		MessageCombinerUdf<K, Message> combinerUdf =
			new MessageCombinerUdf<>(combineFunction, workSetTypeInfo);

		DataSet<Tuple2<K, Either<NullValue, Message>>> combinedMessages = allMessages
				.groupBy(0).reduceGroup(combinerUdf)
				.setCombinable(true);

		newWorkSet = combinedMessages;
	}

	// configure the compute function
	superstepComputation = superstepComputation.name("Compute Function");
	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getBcastVars()) {
			superstepComputation = superstepComputation.withBroadcastSet(e.f1, e.f0);
		}
	}

	return iteration.closeWith(solutionSetDelta, newWorkSet);
}

Example #24

Source File: CoGroupOperatorTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testSemanticPropsWithKeySelector2() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	CoGroupOperator<?, ?, ?> coGroupOp = tupleDs1.coGroup(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.with(new DummyTestCoGroupFunction2())
			.withForwardedFieldsFirst("2;4->0")
			.withForwardedFieldsSecond("0->4;1;1->3");

	SemanticProperties semProps = coGroupOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2));
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 6).contains(0));

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(1, 3).contains(1));
	assertTrue(semProps.getForwardingTargetFields(1, 3).contains(3));
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 0);

	assertTrue(semProps.getReadFields(0).size() == 3);
	assertTrue(semProps.getReadFields(0).contains(2));
	assertTrue(semProps.getReadFields(0).contains(3));
	assertTrue(semProps.getReadFields(0).contains(4));

	assertTrue(semProps.getReadFields(1) == null);
}

Example #25

Source File: CoGroupOperatorTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testSemanticPropsWithKeySelector1() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	CoGroupOperator<?, ?, ?> coGroupOp = tupleDs1.coGroup(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.with(new DummyTestCoGroupFunction1());

	SemanticProperties semProps = coGroupOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1));
	assertTrue(semProps.getForwardingTargetFields(0, 3).contains(3));
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0);

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 4).contains(2));
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 6).contains(0));

	assertTrue(semProps.getReadFields(0).size() == 3);
	assertTrue(semProps.getReadFields(0).contains(2));
	assertTrue(semProps.getReadFields(0).contains(4));
	assertTrue(semProps.getReadFields(0).contains(6));

	assertTrue(semProps.getReadFields(1).size() == 2);
	assertTrue(semProps.getReadFields(1).contains(3));
	assertTrue(semProps.getReadFields(1).contains(5));
}

Example #26

Source File: DataSet.java From Flink-CEPplus with Apache License 2.0

2 votes

/**
 * Initiates a CoGroup transformation.
 *
 * <p>A CoGroup transformation combines the elements of
 *   two {@link DataSet DataSets} into one DataSet. It groups each DataSet individually on a key and
 *   gives groups of both DataSets with equal keys together into a {@link org.apache.flink.api.common.functions.RichCoGroupFunction}.
 *   If a DataSet has a group with no matching key in the other DataSet, the CoGroupFunction
 *   is called with an empty group for the non-existing group.
 *
 * <p>The CoGroupFunction can iterate over the elements of both groups and return any number
 *   of elements including none.
 *
 * <p>This method returns a {@link CoGroupOperatorSets} on which one of the {@code where} methods
 * can be called to define the join key of the first joining (i.e., this) DataSet.
 *
 * @param other The other DataSet of the CoGroup transformation.
 * @return A CoGroupOperatorSets to continue the definition of the CoGroup transformation.
 *
 * @see CoGroupOperatorSets
 * @see CoGroupOperator
 * @see DataSet
 */
public <R> CoGroupOperator.CoGroupOperatorSets<T, R> coGroup(DataSet<R> other) {
	return new CoGroupOperator.CoGroupOperatorSets<>(this, other);
}

Example #27

Source File: DataSet.java From flink with Apache License 2.0

2 votes

/**
 * Initiates a CoGroup transformation.
 *
 * <p>A CoGroup transformation combines the elements of
 *   two {@link DataSet DataSets} into one DataSet. It groups each DataSet individually on a key and
 *   gives groups of both DataSets with equal keys together into a {@link org.apache.flink.api.common.functions.RichCoGroupFunction}.
 *   If a DataSet has a group with no matching key in the other DataSet, the CoGroupFunction
 *   is called with an empty group for the non-existing group.
 *
 * <p>The CoGroupFunction can iterate over the elements of both groups and return any number
 *   of elements including none.
 *
 * <p>This method returns a {@link CoGroupOperatorSets} on which one of the {@code where} methods
 * can be called to define the join key of the first joining (i.e., this) DataSet.
 *
 * @param other The other DataSet of the CoGroup transformation.
 * @return A CoGroupOperatorSets to continue the definition of the CoGroup transformation.
 *
 * @see CoGroupOperatorSets
 * @see CoGroupOperator
 * @see DataSet
 */
public <R> CoGroupOperator.CoGroupOperatorSets<T, R> coGroup(DataSet<R> other) {
	return new CoGroupOperator.CoGroupOperatorSets<>(this, other);
}

Example #28

Source File: DataSet.java From flink with Apache License 2.0

2 votes

/**
 * Initiates a CoGroup transformation.
 *
 * <p>A CoGroup transformation combines the elements of
 *   two {@link DataSet DataSets} into one DataSet. It groups each DataSet individually on a key and
 *   gives groups of both DataSets with equal keys together into a {@link org.apache.flink.api.common.functions.RichCoGroupFunction}.
 *   If a DataSet has a group with no matching key in the other DataSet, the CoGroupFunction
 *   is called with an empty group for the non-existing group.
 *
 * <p>The CoGroupFunction can iterate over the elements of both groups and return any number
 *   of elements including none.
 *
 * <p>This method returns a {@link CoGroupOperatorSets} on which one of the {@code where} methods
 * can be called to define the join key of the first joining (i.e., this) DataSet.
 *
 * @param other The other DataSet of the CoGroup transformation.
 * @return A CoGroupOperatorSets to continue the definition of the CoGroup transformation.
 *
 * @see CoGroupOperatorSets
 * @see CoGroupOperator
 * @see DataSet
 */
public <R> CoGroupOperator.CoGroupOperatorSets<T, R> coGroup(DataSet<R> other) {
	return new CoGroupOperator.CoGroupOperatorSets<>(this, other);
}