Java Code Examples for org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint#BROADCAST_HASH_SECOND

The following examples show how to use org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint#BROADCAST_HASH_SECOND . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MaximumDegree.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Override
public Graph<K, VV, EV> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	// u, d(u)
	DataSet<Vertex<K, LongValue>> vertexDegree = input
		.run(new VertexDegree<K, VV, EV>()
			.setReduceOnTargetId(reduceOnTargetId.get())
			.setParallelism(parallelism));

	// u, d(u) if d(u) > maximumDegree
	DataSet<Tuple1<K>> highDegreeVertices = vertexDegree
		.flatMap(new DegreeFilter<>(maximumDegree))
			.setParallelism(parallelism)
			.name("Filter high-degree vertices");

	JoinHint joinHint = broadcastHighDegreeVertices.get() ? JoinHint.BROADCAST_HASH_SECOND : JoinHint.REPARTITION_HASH_SECOND;

	// Vertices
	DataSet<Vertex<K, VV>> vertices = input
		.getVertices()
		.leftOuterJoin(highDegreeVertices, joinHint)
		.where(0)
		.equalTo(0)
		.with(new ProjectVertex<>())
			.setParallelism(parallelism)
			.name("Project low-degree vertices");

	// Edges
	DataSet<Edge<K, EV>> edges = input
		.getEdges()
		.leftOuterJoin(highDegreeVertices, joinHint)
		.where(reduceOnTargetId.get() ? 1 : 0)
		.equalTo(0)
			.with(new ProjectEdge<>())
			.setParallelism(parallelism)
			.name("Project low-degree edges by " + (reduceOnTargetId.get() ? "target" : "source"))
		.leftOuterJoin(highDegreeVertices, joinHint)
		.where(reduceOnTargetId.get() ? 0 : 1)
		.equalTo(0)
		.with(new ProjectEdge<>())
			.setParallelism(parallelism)
			.name("Project low-degree edges by " + (reduceOnTargetId.get() ? "source" : "target"));

	// Graph
	return Graph.fromDataSet(vertices, edges, input.getContext());
}
 
Example 2
Source File: MaximumDegree.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public Graph<K, VV, EV> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	// u, d(u)
	DataSet<Vertex<K, LongValue>> vertexDegree = input
		.run(new VertexDegree<K, VV, EV>()
			.setReduceOnTargetId(reduceOnTargetId.get())
			.setParallelism(parallelism));

	// u, d(u) if d(u) > maximumDegree
	DataSet<Tuple1<K>> highDegreeVertices = vertexDegree
		.flatMap(new DegreeFilter<>(maximumDegree))
			.setParallelism(parallelism)
			.name("Filter high-degree vertices");

	JoinHint joinHint = broadcastHighDegreeVertices.get() ? JoinHint.BROADCAST_HASH_SECOND : JoinHint.REPARTITION_HASH_SECOND;

	// Vertices
	DataSet<Vertex<K, VV>> vertices = input
		.getVertices()
		.leftOuterJoin(highDegreeVertices, joinHint)
		.where(0)
		.equalTo(0)
		.with(new ProjectVertex<>())
			.setParallelism(parallelism)
			.name("Project low-degree vertices");

	// Edges
	DataSet<Edge<K, EV>> edges = input
		.getEdges()
		.leftOuterJoin(highDegreeVertices, joinHint)
		.where(reduceOnTargetId.get() ? 1 : 0)
		.equalTo(0)
			.with(new ProjectEdge<>())
			.setParallelism(parallelism)
			.name("Project low-degree edges by " + (reduceOnTargetId.get() ? "target" : "source"))
		.leftOuterJoin(highDegreeVertices, joinHint)
		.where(reduceOnTargetId.get() ? 0 : 1)
		.equalTo(0)
		.with(new ProjectEdge<>())
			.setParallelism(parallelism)
			.name("Project low-degree edges by " + (reduceOnTargetId.get() ? "source" : "target"));

	// Graph
	return Graph.fromDataSet(vertices, edges, input.getContext());
}
 
Example 3
Source File: MaximumDegree.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public Graph<K, VV, EV> runInternal(Graph<K, VV, EV> input)
		throws Exception {
	// u, d(u)
	DataSet<Vertex<K, LongValue>> vertexDegree = input
		.run(new VertexDegree<K, VV, EV>()
			.setReduceOnTargetId(reduceOnTargetId.get())
			.setParallelism(parallelism));

	// u, d(u) if d(u) > maximumDegree
	DataSet<Tuple1<K>> highDegreeVertices = vertexDegree
		.flatMap(new DegreeFilter<>(maximumDegree))
			.setParallelism(parallelism)
			.name("Filter high-degree vertices");

	JoinHint joinHint = broadcastHighDegreeVertices.get() ? JoinHint.BROADCAST_HASH_SECOND : JoinHint.REPARTITION_HASH_SECOND;

	// Vertices
	DataSet<Vertex<K, VV>> vertices = input
		.getVertices()
		.leftOuterJoin(highDegreeVertices, joinHint)
		.where(0)
		.equalTo(0)
		.with(new ProjectVertex<>())
			.setParallelism(parallelism)
			.name("Project low-degree vertices");

	// Edges
	DataSet<Edge<K, EV>> edges = input
		.getEdges()
		.leftOuterJoin(highDegreeVertices, joinHint)
		.where(reduceOnTargetId.get() ? 1 : 0)
		.equalTo(0)
			.with(new ProjectEdge<>())
			.setParallelism(parallelism)
			.name("Project low-degree edges by " + (reduceOnTargetId.get() ? "target" : "source"))
		.leftOuterJoin(highDegreeVertices, joinHint)
		.where(reduceOnTargetId.get() ? 0 : 1)
		.equalTo(0)
		.with(new ProjectEdge<>())
			.setParallelism(parallelism)
			.name("Project low-degree edges by " + (reduceOnTargetId.get() ? "source" : "target"));

	// Graph
	return Graph.fromDataSet(vertices, edges, input.getContext());
}
 
Example 4
Source File: DataSet.java    From Flink-CEPplus with Apache License 2.0 2 votes vote down vote up
/**
 * Initiates a Join transformation.
 *
 * <p>A Join transformation joins the elements of two
 *   {@link DataSet DataSets} on key equality and provides multiple ways to combine
 *   joining elements into one DataSet.
 *
 * <p>This method also gives the hint to the optimizer that the second DataSet to join is much
 *   smaller than the first one.
 *
 * <p>This method returns a {@link JoinOperatorSets} on which
 *   {@link JoinOperatorSets#where(String...)} needs to be called to define the join key of the first
 *   joining (i.e., this) DataSet.
 *
 * @param other The other DataSet with which this DataSet is joined.
 * @return A JoinOperatorSets to continue the definition of the Join transformation.
 *
 * @see JoinOperatorSets
 * @see DataSet
 */
public <R> JoinOperatorSets<T, R> joinWithTiny(DataSet<R> other) {
	return new JoinOperatorSets<>(this, other, JoinHint.BROADCAST_HASH_SECOND);
}
 
Example 5
Source File: DataSet.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Initiates a Join transformation.
 *
 * <p>A Join transformation joins the elements of two
 *   {@link DataSet DataSets} on key equality and provides multiple ways to combine
 *   joining elements into one DataSet.
 *
 * <p>This method also gives the hint to the optimizer that the second DataSet to join is much
 *   smaller than the first one.
 *
 * <p>This method returns a {@link JoinOperatorSets} on which
 *   {@link JoinOperatorSets#where(String...)} needs to be called to define the join key of the first
 *   joining (i.e., this) DataSet.
 *
 * @param other The other DataSet with which this DataSet is joined.
 * @return A JoinOperatorSets to continue the definition of the Join transformation.
 *
 * @see JoinOperatorSets
 * @see DataSet
 */
public <R> JoinOperatorSets<T, R> joinWithTiny(DataSet<R> other) {
	return new JoinOperatorSets<>(this, other, JoinHint.BROADCAST_HASH_SECOND);
}
 
Example 6
Source File: DataSet.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Initiates a Join transformation.
 *
 * <p>A Join transformation joins the elements of two
 *   {@link DataSet DataSets} on key equality and provides multiple ways to combine
 *   joining elements into one DataSet.
 *
 * <p>This method also gives the hint to the optimizer that the second DataSet to join is much
 *   smaller than the first one.
 *
 * <p>This method returns a {@link JoinOperatorSets} on which
 *   {@link JoinOperatorSets#where(String...)} needs to be called to define the join key of the first
 *   joining (i.e., this) DataSet.
 *
 * @param other The other DataSet with which this DataSet is joined.
 * @return A JoinOperatorSets to continue the definition of the Join transformation.
 *
 * @see JoinOperatorSets
 * @see DataSet
 */
public <R> JoinOperatorSets<T, R> joinWithTiny(DataSet<R> other) {
	return new JoinOperatorSets<>(this, other, JoinHint.BROADCAST_HASH_SECOND);
}