org.apache.flink.api.java.operators.JoinOperator Java Examples

The following examples show how to use org.apache.flink.api.java.operators.JoinOperator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: InputFormatExample.java    From flink-simple-tutorial with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    // item dataSet
    String itemPath = "item.csv";
    String[] itemField = new String[]{"id", "price"};
    DataSet<Item> items = getSource(env, itemPath, itemField, Item.class);

    // info dataSet
    String infoPath = "info.csv";
    String[] infoField = new String[]{"id", "color", "country"};
    DataSet<Info> infos = getSource(env, infoPath, infoField, Info.class);
    // 关联两个dataset
    JoinOperator.DefaultJoin<Item, Info> dataSet = items.join(infos).where("id").equalTo("id");
    // 使用 joinFunction 处理合并后的两个dataSet
    dataSet.with(new JoinFunction<Item, Info, String>() {
        @Override
        public String join(Item item, Info info) throws Exception {
            return "商品ID:" + item.getId() + " 价格:"+item.getPrice() + " 颜色:"+ info.getColor() + " 国家:" + info.getCountry();
        }
    }).print();

}
 
Example #2
Source File: BroadcastBranchingITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	// Sc1 generates M parameters a,b,c for second degree polynomials P(x) = ax^2 + bx + c identified by id
	DataSet<Tuple4<String, Integer, Integer, Integer>> sc1 = env
			.fromElements(new Tuple4<>("1", 61, 6, 29), new Tuple4<>("2", 7, 13, 10), new Tuple4<>("3", 8, 13, 27));

	// Sc2 generates N x values to be evaluated with the polynomial identified by id
	DataSet<Tuple2<String, Integer>> sc2 = env
			.fromElements(new Tuple2<>("1", 5), new Tuple2<>("2", 3), new Tuple2<>("3", 6));

	// Sc3 generates N y values to be evaluated with the polynomial identified by id
	DataSet<Tuple2<String, Integer>> sc3 = env
			.fromElements(new Tuple2<>("1", 2), new Tuple2<>("2", 3), new Tuple2<>("3", 7));

	// Jn1 matches x and y values on id and emits (id, x, y) triples
	JoinOperator<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple3<String, Integer, Integer>> jn1 =
			sc2.join(sc3).where(0).equalTo(0).with(new Jn1());

	// Jn2 matches polynomial and arguments by id, computes p = min(P(x),P(y)) and emits (id, p) tuples
	JoinOperator<Tuple3<String, Integer, Integer>, Tuple4<String, Integer, Integer, Integer>, Tuple2<String, Integer>> jn2 =
			jn1.join(sc1).where(0).equalTo(0).with(new Jn2());

	// Mp1 selects (id, x, y) triples where x = y and broadcasts z (=x=y) to Mp2
	FlatMapOperator<Tuple3<String, Integer, Integer>, Tuple2<String, Integer>> mp1 =
			jn1.flatMap(new Mp1());

	// Mp2 filters out all p values which can be divided by z
	List<Tuple2<String, Integer>> result = jn2.flatMap(new Mp2()).withBroadcastSet(mp1, "z").collect();

	JavaProgramTestBase.compareResultAsText(result, RESULT);
}
 
Example #3
Source File: WorksetIterationsRecordApiCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private Plan getTestPlan(boolean joinPreservesSolutionSet, boolean mapBeforeSolutionDelta) {

		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Tuple2<Long, Long>> solSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Solution Set");
		DataSet<Tuple2<Long, Long>> workSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Workset");
		DataSet<Tuple2<Long, Long>> invariantInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Invariant Input");

		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> deltaIt = solSetInput.iterateDelta(workSetInput, 100, 0).name(ITERATION_NAME);

		DataSet<Tuple2<Long, Long>> join1 = deltaIt.getWorkset().join(invariantInput).where(0).equalTo(0)
				.with(new IdentityJoiner<Tuple2<Long, Long>>())
				.withForwardedFieldsFirst("*").name(JOIN_WITH_INVARIANT_NAME);

		DataSet<Tuple2<Long, Long>> join2 = deltaIt.getSolutionSet().join(join1).where(0).equalTo(0)
				.with(new IdentityJoiner<Tuple2<Long, Long>>())
				.name(JOIN_WITH_SOLUTION_SET);
		if(joinPreservesSolutionSet) {
			((JoinOperator<?,?,?>)join2).withForwardedFieldsFirst("*");
		}

		DataSet<Tuple2<Long, Long>> nextWorkset = join2.groupBy(0).reduceGroup(new IdentityGroupReducer<Tuple2<Long, Long>>())
				.withForwardedFields("*").name(NEXT_WORKSET_REDUCER_NAME);

		if(mapBeforeSolutionDelta) {

			DataSet<Tuple2<Long, Long>> mapper = join2.map(new IdentityMapper<Tuple2<Long, Long>>())
					.withForwardedFields("*").name(SOLUTION_DELTA_MAPPER_NAME);

			deltaIt.closeWith(mapper, nextWorkset)
					.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
		}
		else {
			deltaIt.closeWith(join2, nextWorkset)
					.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		}

		return env.createProgramPlan();
	}
 
Example #4
Source File: JoinOperatorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testSemanticPropsWithKeySelector3() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	JoinOperator<?, ?, ? extends Tuple> joinOp = tupleDs1.join(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.projectFirst(2)
			.projectSecond(0, 0, 3)
			.projectFirst(0, 4)
			.projectSecond(2);

	SemanticProperties semProps = joinOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 4).contains(0));
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 6).contains(5));

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(1, 2).contains(1));
	assertTrue(semProps.getForwardingTargetFields(1, 2).contains(2));
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 4).contains(6));
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 5).contains(3));
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 0);

}
 
Example #5
Source File: BroadcastBranchingITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	// Sc1 generates M parameters a,b,c for second degree polynomials P(x) = ax^2 + bx + c identified by id
	DataSet<Tuple4<String, Integer, Integer, Integer>> sc1 = env
			.fromElements(new Tuple4<>("1", 61, 6, 29), new Tuple4<>("2", 7, 13, 10), new Tuple4<>("3", 8, 13, 27));

	// Sc2 generates N x values to be evaluated with the polynomial identified by id
	DataSet<Tuple2<String, Integer>> sc2 = env
			.fromElements(new Tuple2<>("1", 5), new Tuple2<>("2", 3), new Tuple2<>("3", 6));

	// Sc3 generates N y values to be evaluated with the polynomial identified by id
	DataSet<Tuple2<String, Integer>> sc3 = env
			.fromElements(new Tuple2<>("1", 2), new Tuple2<>("2", 3), new Tuple2<>("3", 7));

	// Jn1 matches x and y values on id and emits (id, x, y) triples
	JoinOperator<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple3<String, Integer, Integer>> jn1 =
			sc2.join(sc3).where(0).equalTo(0).with(new Jn1());

	// Jn2 matches polynomial and arguments by id, computes p = min(P(x),P(y)) and emits (id, p) tuples
	JoinOperator<Tuple3<String, Integer, Integer>, Tuple4<String, Integer, Integer, Integer>, Tuple2<String, Integer>> jn2 =
			jn1.join(sc1).where(0).equalTo(0).with(new Jn2());

	// Mp1 selects (id, x, y) triples where x = y and broadcasts z (=x=y) to Mp2
	FlatMapOperator<Tuple3<String, Integer, Integer>, Tuple2<String, Integer>> mp1 =
			jn1.flatMap(new Mp1());

	// Mp2 filters out all p values which can be divided by z
	List<Tuple2<String, Integer>> result = jn2.flatMap(new Mp2()).withBroadcastSet(mp1, "z").collect();

	JavaProgramTestBase.compareResultAsText(result, RESULT);
}
 
Example #6
Source File: CoGroupConnectedComponentsITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(verticesPath).fieldDelimiter(" ").types(Long.class).name("Vertices");

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).name("Edges");

	DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.map(new MapFunction<Tuple1<Long>, Tuple2<Long, Long>>() {
		@Override
		public Tuple2<Long, Long> map(Tuple1<Long> value) throws Exception {
			return new Tuple2<>(value.f0, value.f0);
		}
	}).name("Assign Vertex Ids");

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, MAX_ITERATIONS, 0);

	JoinOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset()
			.join(edges).where(0).equalTo(0)
			.with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
				@Override
				public Tuple2<Long, Long> join(Tuple2<Long, Long> first, Tuple2<Long, Long> second) throws Exception {
					return new Tuple2<>(second.f1, first.f1);
				}
			})
			.name("Join Candidate Id With Neighbor");

	CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors
			.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new MinIdAndUpdate())
			.name("min Id and Update");

	iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(resultPath, "\n", " ").name("Result");

	env.execute("Workset Connected Components");
}
 
Example #7
Source File: WorksetIterationsRecordApiCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private Plan getTestPlan(boolean joinPreservesSolutionSet, boolean mapBeforeSolutionDelta) {

		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Tuple2<Long, Long>> solSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Solution Set");
		DataSet<Tuple2<Long, Long>> workSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Workset");
		DataSet<Tuple2<Long, Long>> invariantInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Invariant Input");

		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> deltaIt = solSetInput.iterateDelta(workSetInput, 100, 0).name(ITERATION_NAME);

		DataSet<Tuple2<Long, Long>> join1 = deltaIt.getWorkset().join(invariantInput).where(0).equalTo(0)
				.with(new IdentityJoiner<Tuple2<Long, Long>>())
				.withForwardedFieldsFirst("*").name(JOIN_WITH_INVARIANT_NAME);

		DataSet<Tuple2<Long, Long>> join2 = deltaIt.getSolutionSet().join(join1).where(0).equalTo(0)
				.with(new IdentityJoiner<Tuple2<Long, Long>>())
				.name(JOIN_WITH_SOLUTION_SET);
		if(joinPreservesSolutionSet) {
			((JoinOperator<?,?,?>)join2).withForwardedFieldsFirst("*");
		}

		DataSet<Tuple2<Long, Long>> nextWorkset = join2.groupBy(0).reduceGroup(new IdentityGroupReducer<Tuple2<Long, Long>>())
				.withForwardedFields("*").name(NEXT_WORKSET_REDUCER_NAME);

		if(mapBeforeSolutionDelta) {

			DataSet<Tuple2<Long, Long>> mapper = join2.map(new IdentityMapper<Tuple2<Long, Long>>())
					.withForwardedFields("*").name(SOLUTION_DELTA_MAPPER_NAME);

			deltaIt.closeWith(mapper, nextWorkset)
					.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
		}
		else {
			deltaIt.closeWith(join2, nextWorkset)
					.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		}

		return env.createProgramPlan();
	}
 
Example #8
Source File: JoinOperatorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testSemanticPropsWithKeySelector3() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	JoinOperator<?, ?, ? extends Tuple> joinOp = tupleDs1.join(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.projectFirst(2)
			.projectSecond(0, 0, 3)
			.projectFirst(0, 4)
			.projectSecond(2);

	SemanticProperties semProps = joinOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 4).contains(0));
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 6).contains(5));

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(1, 2).contains(1));
	assertTrue(semProps.getForwardingTargetFields(1, 2).contains(2));
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 4).contains(6));
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 5).contains(3));
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 0);

}
 
Example #9
Source File: BroadcastBranchingITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	// Sc1 generates M parameters a,b,c for second degree polynomials P(x) = ax^2 + bx + c identified by id
	DataSet<Tuple4<String, Integer, Integer, Integer>> sc1 = env
			.fromElements(new Tuple4<>("1", 61, 6, 29), new Tuple4<>("2", 7, 13, 10), new Tuple4<>("3", 8, 13, 27));

	// Sc2 generates N x values to be evaluated with the polynomial identified by id
	DataSet<Tuple2<String, Integer>> sc2 = env
			.fromElements(new Tuple2<>("1", 5), new Tuple2<>("2", 3), new Tuple2<>("3", 6));

	// Sc3 generates N y values to be evaluated with the polynomial identified by id
	DataSet<Tuple2<String, Integer>> sc3 = env
			.fromElements(new Tuple2<>("1", 2), new Tuple2<>("2", 3), new Tuple2<>("3", 7));

	// Jn1 matches x and y values on id and emits (id, x, y) triples
	JoinOperator<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple3<String, Integer, Integer>> jn1 =
			sc2.join(sc3).where(0).equalTo(0).with(new Jn1());

	// Jn2 matches polynomial and arguments by id, computes p = min(P(x),P(y)) and emits (id, p) tuples
	JoinOperator<Tuple3<String, Integer, Integer>, Tuple4<String, Integer, Integer, Integer>, Tuple2<String, Integer>> jn2 =
			jn1.join(sc1).where(0).equalTo(0).with(new Jn2());

	// Mp1 selects (id, x, y) triples where x = y and broadcasts z (=x=y) to Mp2
	FlatMapOperator<Tuple3<String, Integer, Integer>, Tuple2<String, Integer>> mp1 =
			jn1.flatMap(new Mp1());

	// Mp2 filters out all p values which can be divided by z
	List<Tuple2<String, Integer>> result = jn2.flatMap(new Mp2()).withBroadcastSet(mp1, "z").collect();

	JavaProgramTestBase.compareResultAsText(result, RESULT);
}
 
Example #10
Source File: CoGroupConnectedComponentsITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(verticesPath).fieldDelimiter(" ").types(Long.class).name("Vertices");

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).name("Edges");

	DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.map(new MapFunction<Tuple1<Long>, Tuple2<Long, Long>>() {
		@Override
		public Tuple2<Long, Long> map(Tuple1<Long> value) throws Exception {
			return new Tuple2<>(value.f0, value.f0);
		}
	}).name("Assign Vertex Ids");

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, MAX_ITERATIONS, 0);

	JoinOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset()
			.join(edges).where(0).equalTo(0)
			.with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
				@Override
				public Tuple2<Long, Long> join(Tuple2<Long, Long> first, Tuple2<Long, Long> second) throws Exception {
					return new Tuple2<>(second.f1, first.f1);
				}
			})
			.name("Join Candidate Id With Neighbor");

	CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors
			.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new MinIdAndUpdate())
			.name("min Id and Update");

	iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(resultPath, "\n", " ").name("Result");

	env.execute("Workset Connected Components");
}
 
Example #11
Source File: WorksetIterationsRecordApiCompilerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private Plan getTestPlan(boolean joinPreservesSolutionSet, boolean mapBeforeSolutionDelta) {

		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Tuple2<Long, Long>> solSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Solution Set");
		DataSet<Tuple2<Long, Long>> workSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Workset");
		DataSet<Tuple2<Long, Long>> invariantInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Invariant Input");

		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> deltaIt = solSetInput.iterateDelta(workSetInput, 100, 0).name(ITERATION_NAME);

		DataSet<Tuple2<Long, Long>> join1 = deltaIt.getWorkset().join(invariantInput).where(0).equalTo(0)
				.with(new IdentityJoiner<Tuple2<Long, Long>>())
				.withForwardedFieldsFirst("*").name(JOIN_WITH_INVARIANT_NAME);

		DataSet<Tuple2<Long, Long>> join2 = deltaIt.getSolutionSet().join(join1).where(0).equalTo(0)
				.with(new IdentityJoiner<Tuple2<Long, Long>>())
				.name(JOIN_WITH_SOLUTION_SET);
		if(joinPreservesSolutionSet) {
			((JoinOperator<?,?,?>)join2).withForwardedFieldsFirst("*");
		}

		DataSet<Tuple2<Long, Long>> nextWorkset = join2.groupBy(0).reduceGroup(new IdentityGroupReducer<Tuple2<Long, Long>>())
				.withForwardedFields("*").name(NEXT_WORKSET_REDUCER_NAME);

		if(mapBeforeSolutionDelta) {

			DataSet<Tuple2<Long, Long>> mapper = join2.map(new IdentityMapper<Tuple2<Long, Long>>())
					.withForwardedFields("*").name(SOLUTION_DELTA_MAPPER_NAME);

			deltaIt.closeWith(mapper, nextWorkset)
					.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
		}
		else {
			deltaIt.closeWith(join2, nextWorkset)
					.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		}

		return env.createProgramPlan();
	}
 
Example #12
Source File: JoinOperatorTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testSemanticPropsWithKeySelector3() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	JoinOperator<?, ?, ? extends Tuple> joinOp = tupleDs1.join(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.projectFirst(2)
			.projectSecond(0, 0, 3)
			.projectFirst(0, 4)
			.projectSecond(2);

	SemanticProperties semProps = joinOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 4).contains(0));
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 6).contains(5));

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(1, 2).contains(1));
	assertTrue(semProps.getForwardingTargetFields(1, 2).contains(2));
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 4).contains(6));
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 5).contains(3));
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 0);

}
 
Example #13
Source File: CoGroupConnectedComponentsITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(verticesPath).fieldDelimiter(" ").types(Long.class).name("Vertices");

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).name("Edges");

	DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.map(new MapFunction<Tuple1<Long>, Tuple2<Long, Long>>() {
		@Override
		public Tuple2<Long, Long> map(Tuple1<Long> value) throws Exception {
			return new Tuple2<>(value.f0, value.f0);
		}
	}).name("Assign Vertex Ids");

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, MAX_ITERATIONS, 0);

	JoinOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset()
			.join(edges).where(0).equalTo(0)
			.with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
				@Override
				public Tuple2<Long, Long> join(Tuple2<Long, Long> first, Tuple2<Long, Long> second) throws Exception {
					return new Tuple2<>(second.f1, first.f1);
				}
			})
			.name("Join Candidate Id With Neighbor");

	CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors
			.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new MinIdAndUpdate())
			.name("min Id and Update");

	iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(resultPath, "\n", " ").name("Result");

	env.execute("Workset Connected Components");
}
 
Example #14
Source File: JoinOperatorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSemanticPropsWithKeySelector1() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	JoinOperator<?, ?, ?> joinOp = tupleDs1.join(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.with(new DummyTestJoinFunction1());

	SemanticProperties semProps = joinOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1));
	assertTrue(semProps.getForwardingTargetFields(0, 3).contains(3));
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0);

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 4).contains(2));
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 6).contains(0));

	assertTrue(semProps.getReadFields(0).size() == 3);
	assertTrue(semProps.getReadFields(0).contains(2));
	assertTrue(semProps.getReadFields(0).contains(4));
	assertTrue(semProps.getReadFields(0).contains(6));

	assertTrue(semProps.getReadFields(1).size() == 2);
	assertTrue(semProps.getReadFields(1).contains(3));
	assertTrue(semProps.getReadFields(1).contains(5));
}
 
Example #15
Source File: JoinOperatorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSemanticPropsWithKeySelector2() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	JoinOperator<?, ?, ?> joinOp = tupleDs1.join(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.with(new DummyTestJoinFunction2())
				.withForwardedFieldsFirst("2;4->0")
				.withForwardedFieldsSecond("0->4;1;1->3");

	SemanticProperties semProps = joinOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2));
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 6).contains(0));

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(1, 3).contains(1));
	assertTrue(semProps.getForwardingTargetFields(1, 3).contains(3));
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 0);

	assertTrue(semProps.getReadFields(0).size() == 3);
	assertTrue(semProps.getReadFields(0).contains(2));
	assertTrue(semProps.getReadFields(0).contains(3));
	assertTrue(semProps.getReadFields(0).contains(4));

	assertTrue(semProps.getReadFields(1) == null);
}
 
Example #16
Source File: JoinOperatorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testSemanticPropsWithKeySelector2() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	JoinOperator<?, ?, ?> joinOp = tupleDs1.join(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.with(new DummyTestJoinFunction2())
				.withForwardedFieldsFirst("2;4->0")
				.withForwardedFieldsSecond("0->4;1;1->3");

	SemanticProperties semProps = joinOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2));
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 6).contains(0));

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(1, 3).contains(1));
	assertTrue(semProps.getForwardingTargetFields(1, 3).contains(3));
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 0);

	assertTrue(semProps.getReadFields(0).size() == 3);
	assertTrue(semProps.getReadFields(0).contains(2));
	assertTrue(semProps.getReadFields(0).contains(3));
	assertTrue(semProps.getReadFields(0).contains(4));

	assertTrue(semProps.getReadFields(1) == null);
}
 
Example #17
Source File: JoinOperatorTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testSemanticPropsWithKeySelector1() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	JoinOperator<?, ?, ?> joinOp = tupleDs1.join(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.with(new DummyTestJoinFunction1());

	SemanticProperties semProps = joinOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1));
	assertTrue(semProps.getForwardingTargetFields(0, 3).contains(3));
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0);

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 4).contains(2));
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 6).contains(0));

	assertTrue(semProps.getReadFields(0).size() == 3);
	assertTrue(semProps.getReadFields(0).contains(2));
	assertTrue(semProps.getReadFields(0).contains(4));
	assertTrue(semProps.getReadFields(0).contains(6));

	assertTrue(semProps.getReadFields(1).size() == 2);
	assertTrue(semProps.getReadFields(1).contains(3));
	assertTrue(semProps.getReadFields(1).contains(5));
}
 
Example #18
Source File: JoinOperatorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSemanticPropsWithKeySelector1() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	JoinOperator<?, ?, ?> joinOp = tupleDs1.join(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.with(new DummyTestJoinFunction1());

	SemanticProperties semProps = joinOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(0, 3).contains(1));
	assertTrue(semProps.getForwardingTargetFields(0, 3).contains(3));
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 0);

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 4).contains(2));
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 6).contains(0));

	assertTrue(semProps.getReadFields(0).size() == 3);
	assertTrue(semProps.getReadFields(0).contains(2));
	assertTrue(semProps.getReadFields(0).contains(4));
	assertTrue(semProps.getReadFields(0).contains(6));

	assertTrue(semProps.getReadFields(1).size() == 2);
	assertTrue(semProps.getReadFields(1).contains(3));
	assertTrue(semProps.getReadFields(1).contains(5));
}
 
Example #19
Source File: JoinOperatorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSemanticPropsWithKeySelector2() {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs2 = env.fromCollection(emptyTupleData, tupleTypeInfo);

	JoinOperator<?, ?, ?> joinOp = tupleDs1.join(tupleDs2)
			.where(new DummyTestKeySelector()).equalTo(new DummyTestKeySelector())
			.with(new DummyTestJoinFunction2())
				.withForwardedFieldsFirst("2;4->0")
				.withForwardedFieldsSecond("0->4;1;1->3");

	SemanticProperties semProps = joinOp.getSemanticProperties();

	assertTrue(semProps.getForwardingTargetFields(0, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 2).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 3).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 4).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 4).contains(2));
	assertTrue(semProps.getForwardingTargetFields(0, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(0, 6).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(0, 6).contains(0));

	assertTrue(semProps.getForwardingTargetFields(1, 0).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 1).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 2).size() == 1);
	assertTrue(semProps.getForwardingTargetFields(1, 2).contains(4));
	assertTrue(semProps.getForwardingTargetFields(1, 3).size() == 2);
	assertTrue(semProps.getForwardingTargetFields(1, 3).contains(1));
	assertTrue(semProps.getForwardingTargetFields(1, 3).contains(3));
	assertTrue(semProps.getForwardingTargetFields(1, 4).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 5).size() == 0);
	assertTrue(semProps.getForwardingTargetFields(1, 6).size() == 0);

	assertTrue(semProps.getReadFields(0).size() == 3);
	assertTrue(semProps.getReadFields(0).contains(2));
	assertTrue(semProps.getReadFields(0).contains(3));
	assertTrue(semProps.getReadFields(0).contains(4));

	assertTrue(semProps.getReadFields(1) == null);
}
 
Example #20
Source File: JoinFunctionAssigner.java    From flink with Apache License 2.0 votes vote down vote up
<R> JoinOperator<I1, I2, R> with(JoinFunction<I1, I2, R> joinFunction); 
Example #21
Source File: JoinFunctionAssigner.java    From flink with Apache License 2.0 votes vote down vote up
<R> JoinOperator<I1, I2, R> with(FlatJoinFunction<I1, I2, R> joinFunction); 
Example #22
Source File: JoinFunctionAssigner.java    From Flink-CEPplus with Apache License 2.0 votes vote down vote up
<R> JoinOperator<I1, I2, R> with(FlatJoinFunction<I1, I2, R> joinFunction); 
Example #23
Source File: JoinFunctionAssigner.java    From flink with Apache License 2.0 votes vote down vote up
<R> JoinOperator<I1, I2, R> with(JoinFunction<I1, I2, R> joinFunction); 
Example #24
Source File: JoinFunctionAssigner.java    From flink with Apache License 2.0 votes vote down vote up
<R> JoinOperator<I1, I2, R> with(FlatJoinFunction<I1, I2, R> joinFunction); 
Example #25
Source File: JoinFunctionAssigner.java    From Flink-CEPplus with Apache License 2.0 votes vote down vote up
<R> JoinOperator<I1, I2, R> with(JoinFunction<I1, I2, R> joinFunction);