org.apache.flink.optimizer.testfunctions.IdentityGroupReducer Java Examples

The following examples show how to use org.apache.flink.optimizer.testfunctions.IdentityGroupReducer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReduceAllTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testReduce() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);

	set1.reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
			.output(new DiscardingOutputFormat<Long>()).name("Sink");

	Plan plan = env.createProgramPlan();

	try {
		OptimizedPlan oPlan = compileNoStats(plan);
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	} catch(CompilerException ce) {
		ce.printStackTrace();
		fail("The pact compiler is unable to compile this plan correctly");
	}
}
 
Example #2
Source File: HardPlansCompilationTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Source -> Map -> Reduce -> Cross -> Reduce -> Cross -> Reduce ->
 * |--------------------------/                  /
 * |--------------------------------------------/
 * 
 * First cross has SameKeyFirst output contract
 */
@Test
public void testTicket158() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);

	set1.map(new IdentityMapper<Long>()).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
			.cross(set1).with(new IdentityCrosser<Long>()).withForwardedFieldsFirst("*").name("Cross1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce2")
			.cross(set1).with(new IdentityCrosser<Long>()).name("Cross2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce3")
			.output(new DiscardingOutputFormat<Long>()).name("Sink");

	Plan plan = env.createProgramPlan();
	OptimizedPlan oPlan = compileNoStats(plan);

	JobGraphGenerator jobGen = new JobGraphGenerator();
	jobGen.compileJobGraph(oPlan);
}
 
Example #3
Source File: ReduceAllTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testReduce() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);

	set1.reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
			.output(new DiscardingOutputFormat<Long>()).name("Sink");

	Plan plan = env.createProgramPlan();

	try {
		OptimizedPlan oPlan = compileNoStats(plan);
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	} catch(CompilerException ce) {
		ce.printStackTrace();
		fail("The pact compiler is unable to compile this plan correctly");
	}
}
 
Example #4
Source File: ReduceAllTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testReduce() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);

	set1.reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
			.output(new DiscardingOutputFormat<Long>()).name("Sink");

	Plan plan = env.createProgramPlan();

	try {
		OptimizedPlan oPlan = compileNoStats(plan);
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	} catch(CompilerException ce) {
		ce.printStackTrace();
		fail("The pact compiler is unable to compile this plan correctly");
	}
}
 
Example #5
Source File: HardPlansCompilationTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Source -> Map -> Reduce -> Cross -> Reduce -> Cross -> Reduce ->
 * |--------------------------/                  /
 * |--------------------------------------------/
 * 
 * First cross has SameKeyFirst output contract
 */
@Test
public void testTicket158() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);

	set1.map(new IdentityMapper<Long>()).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
			.cross(set1).with(new IdentityCrosser<Long>()).withForwardedFieldsFirst("*").name("Cross1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce2")
			.cross(set1).with(new IdentityCrosser<Long>()).name("Cross2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce3")
			.output(new DiscardingOutputFormat<Long>()).name("Sink");

	Plan plan = env.createProgramPlan();
	OptimizedPlan oPlan = compileNoStats(plan);

	JobGraphGenerator jobGen = new JobGraphGenerator();
	jobGen.compileJobGraph(oPlan);
}
 
Example #6
Source File: HardPlansCompilationTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Source -> Map -> Reduce -> Cross -> Reduce -> Cross -> Reduce ->
 * |--------------------------/                  /
 * |--------------------------------------------/
 * 
 * First cross has SameKeyFirst output contract
 */
@Test
public void testTicket158() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);

	set1.map(new IdentityMapper<Long>()).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
			.cross(set1).with(new IdentityCrosser<Long>()).withForwardedFieldsFirst("*").name("Cross1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce2")
			.cross(set1).with(new IdentityCrosser<Long>()).name("Cross2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce3")
			.output(new DiscardingOutputFormat<Long>()).name("Sink");

	Plan plan = env.createProgramPlan();
	OptimizedPlan oPlan = compileNoStats(plan);

	JobGraphGenerator jobGen = new JobGraphGenerator();
	jobGen.compileJobGraph(oPlan);
}
 
Example #7
Source File: BranchingPlansCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testBranchingUnion() {
	try {
		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Long> source1 = env.generateSequence(0,1);
		DataSet<Long> source2 = env.generateSequence(0,1);

		DataSet<Long> join1 = source1.join(source2).where("*").equalTo("*")
				.with(new IdentityJoiner<Long>()).name("Join 1");

		DataSet<Long> map1 = join1.map(new IdentityMapper<Long>()).name("Map 1");

		DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1");

		DataSet<Long> reduce2 = join1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 2");

		DataSet<Long> map2 = join1.map(new IdentityMapper<Long>()).name("Map 2");

		DataSet<Long> map3 = map2.map(new IdentityMapper<Long>()).name("Map 3");

		DataSet<Long> join2 = reduce1.union(reduce2).union(map2).union(map3)
				.join(map2, JoinHint.REPARTITION_SORT_MERGE).where("*").equalTo("*")
				.with(new IdentityJoiner<Long>()).name("Join 2");

		join2.output(new DiscardingOutputFormat<Long>());

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);

		JobGraphGenerator jobGen = new JobGraphGenerator();
		
		//Compile plan to verify that no error is thrown
		jobGen.compileJobGraph(oPlan);
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #8
Source File: ParallelismChangeTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void checkPropertyHandlingWithDecreasingParallelism() {
	final int p = DEFAULT_PARALLELISM;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);

	env
		.generateSequence(0, 1).setParallelism(p * 2)
		.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Map1")
		.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Reduce1")
		.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map2")
		.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p).name("Reduce2")
		.output(new DiscardingOutputFormat<Long>()).setParallelism(p).name("Sink");

	Plan plan = env.createProgramPlan();
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();

	Assert.assertTrue("The no sorting local strategy.",
			LocalStrategy.SORT == red2Node.getInput().getLocalStrategy() ||
					LocalStrategy.SORT == map2Node.getInput().getLocalStrategy());

	Assert.assertTrue("The no partitioning ship strategy.",
			ShipStrategyType.PARTITION_HASH == red2Node.getInput().getShipStrategy() ||
					ShipStrategyType.PARTITION_HASH == map2Node.getInput().getShipStrategy());
}
 
Example #9
Source File: ParallelismChangeTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 1st reduce and 2nd map, such that more tasks are on one instance.
 * Expected to re-establish partitioning between map and reduce via a local hash.
 */
@Test
public void checkPropertyHandlingWithIncreasingLocalParallelism() {
	final int p = DEFAULT_PARALLELISM * 2;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertTrue("Invalid ship strategy for an operator.", 
			(ShipStrategyType.PARTITION_RANDOM ==  mapIn && ShipStrategyType.PARTITION_HASH == reduceIn) || 
			(ShipStrategyType.PARTITION_HASH == mapIn && ShipStrategyType.FORWARD == reduceIn));
}
 
Example #10
Source File: ParallelismChangeTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 2nd map and 2nd reduce, so the hash partitioning from 1st reduce is not reusable.
 * Expected to re-establish partitioning between map and reduce (hash).
 */
@Test
public void checkPropertyHandlingWithIncreasingGlobalParallelism2() {
	final int p = DEFAULT_PARALLELISM;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, mapIn);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, reduceIn);
}
 
Example #11
Source File: ParallelismChangeTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 1st reduce and 2nd map, so the hash partitioning from 1st reduce is not reusable.
 * Expected to re-establish partitioning between reduce and map, via hash, because random is a full network
 * transit as well.
 */
@Test
public void checkPropertyHandlingWithIncreasingGlobalParallelism1() {
	final int p = DEFAULT_PARALLELISM;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
				.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
				.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
				.withForwardedFields("*").setParallelism(p * 2).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
				.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType redIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, mapIn);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, redIn);
}
 
Example #12
Source File: WorksetIterationsRecordApiCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private Plan getTestPlan(boolean joinPreservesSolutionSet, boolean mapBeforeSolutionDelta) {

		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Tuple2<Long, Long>> solSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Solution Set");
		DataSet<Tuple2<Long, Long>> workSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Workset");
		DataSet<Tuple2<Long, Long>> invariantInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Invariant Input");

		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> deltaIt = solSetInput.iterateDelta(workSetInput, 100, 0).name(ITERATION_NAME);

		DataSet<Tuple2<Long, Long>> join1 = deltaIt.getWorkset().join(invariantInput).where(0).equalTo(0)
				.with(new IdentityJoiner<Tuple2<Long, Long>>())
				.withForwardedFieldsFirst("*").name(JOIN_WITH_INVARIANT_NAME);

		DataSet<Tuple2<Long, Long>> join2 = deltaIt.getSolutionSet().join(join1).where(0).equalTo(0)
				.with(new IdentityJoiner<Tuple2<Long, Long>>())
				.name(JOIN_WITH_SOLUTION_SET);
		if(joinPreservesSolutionSet) {
			((JoinOperator<?,?,?>)join2).withForwardedFieldsFirst("*");
		}

		DataSet<Tuple2<Long, Long>> nextWorkset = join2.groupBy(0).reduceGroup(new IdentityGroupReducer<Tuple2<Long, Long>>())
				.withForwardedFields("*").name(NEXT_WORKSET_REDUCER_NAME);

		if(mapBeforeSolutionDelta) {

			DataSet<Tuple2<Long, Long>> mapper = join2.map(new IdentityMapper<Tuple2<Long, Long>>())
					.withForwardedFields("*").name(SOLUTION_DELTA_MAPPER_NAME);

			deltaIt.closeWith(mapper, nextWorkset)
					.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
		}
		else {
			deltaIt.closeWith(join2, nextWorkset)
					.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		}

		return env.createProgramPlan();
	}
 
Example #13
Source File: BranchingPlansCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultipleIterations() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(100);
	
	DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
	
	DataSet<String> reduced = input
			.map(new IdentityMapper<String>())
			.reduceGroup(new Top1GroupReducer<String>());
		
	IterativeDataSet<String> iteration1 = input.iterate(100);
	IterativeDataSet<String> iteration2 = input.iterate(20);
	IterativeDataSet<String> iteration3 = input.iterate(17);
	
	iteration1.closeWith(iteration1.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "bc1"))
			.output(new DiscardingOutputFormat<String>());
	iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>()).withBroadcastSet(reduced, "bc2"))
			.output(new DiscardingOutputFormat<String>());
	iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>()).withBroadcastSet(reduced, "bc3"))
			.output(new DiscardingOutputFormat<String>());
	
	Plan plan = env.createProgramPlan();
	
	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #14
Source File: BranchingPlansCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultipleIterationsWithClosueBCVars() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(100);

	DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
		
	IterativeDataSet<String> iteration1 = input.iterate(100);
	IterativeDataSet<String> iteration2 = input.iterate(20);
	IterativeDataSet<String> iteration3 = input.iterate(17);
	
	
	iteration1.closeWith(iteration1.map(new IdentityMapper<String>()))
			.output(new DiscardingOutputFormat<String>());
	iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>()))
			.output(new DiscardingOutputFormat<String>());
	iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>()))
			.output(new DiscardingOutputFormat<String>());
	
	Plan plan = env.createProgramPlan();
	
	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #15
Source File: BranchingPlansCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultipleIterationsWithClosueBCVars() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(100);

	DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
		
	IterativeDataSet<String> iteration1 = input.iterate(100);
	IterativeDataSet<String> iteration2 = input.iterate(20);
	IterativeDataSet<String> iteration3 = input.iterate(17);
	
	
	iteration1.closeWith(iteration1.map(new IdentityMapper<String>()))
			.output(new DiscardingOutputFormat<String>());
	iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>()))
			.output(new DiscardingOutputFormat<String>());
	iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>()))
			.output(new DiscardingOutputFormat<String>());
	
	Plan plan = env.createProgramPlan();
	
	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #16
Source File: BranchingPlansCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultipleIterations() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(100);
	
	DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
	
	DataSet<String> reduced = input
			.map(new IdentityMapper<String>())
			.reduceGroup(new Top1GroupReducer<String>());
		
	IterativeDataSet<String> iteration1 = input.iterate(100);
	IterativeDataSet<String> iteration2 = input.iterate(20);
	IterativeDataSet<String> iteration3 = input.iterate(17);
	
	iteration1.closeWith(iteration1.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "bc1"))
			.output(new DiscardingOutputFormat<String>());
	iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>()).withBroadcastSet(reduced, "bc2"))
			.output(new DiscardingOutputFormat<String>());
	iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>()).withBroadcastSet(reduced, "bc3"))
			.output(new DiscardingOutputFormat<String>());
	
	Plan plan = env.createProgramPlan();
	
	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #17
Source File: BranchingPlansCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testBranchingUnion() {
	try {
		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Long> source1 = env.generateSequence(0,1);
		DataSet<Long> source2 = env.generateSequence(0,1);

		DataSet<Long> join1 = source1.join(source2).where("*").equalTo("*")
				.with(new IdentityJoiner<Long>()).name("Join 1");

		DataSet<Long> map1 = join1.map(new IdentityMapper<Long>()).name("Map 1");

		DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1");

		DataSet<Long> reduce2 = join1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 2");

		DataSet<Long> map2 = join1.map(new IdentityMapper<Long>()).name("Map 2");

		DataSet<Long> map3 = map2.map(new IdentityMapper<Long>()).name("Map 3");

		DataSet<Long> join2 = reduce1.union(reduce2).union(map2).union(map3)
				.join(map2, JoinHint.REPARTITION_SORT_MERGE).where("*").equalTo("*")
				.with(new IdentityJoiner<Long>()).name("Join 2");

		join2.output(new DiscardingOutputFormat<Long>());

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);

		JobGraphGenerator jobGen = new JobGraphGenerator();
		
		//Compile plan to verify that no error is thrown
		jobGen.compileJobGraph(oPlan);
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #18
Source File: ParallelismChangeTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 1st reduce and 2nd map, so the hash partitioning from 1st reduce is not reusable.
 * Expected to re-establish partitioning between reduce and map, via hash, because random is a full network
 * transit as well.
 */
@Test
public void checkPropertyHandlingWithIncreasingGlobalParallelism1() {
	final int p = DEFAULT_PARALLELISM;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
				.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
				.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
				.withForwardedFields("*").setParallelism(p * 2).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
				.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType redIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, mapIn);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, redIn);
}
 
Example #19
Source File: BranchingPlansCompilerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultipleIterations() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(100);
	
	DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
	
	DataSet<String> reduced = input
			.map(new IdentityMapper<String>())
			.reduceGroup(new Top1GroupReducer<String>());
		
	IterativeDataSet<String> iteration1 = input.iterate(100);
	IterativeDataSet<String> iteration2 = input.iterate(20);
	IterativeDataSet<String> iteration3 = input.iterate(17);
	
	iteration1.closeWith(iteration1.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "bc1"))
			.output(new DiscardingOutputFormat<String>());
	iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>()).withBroadcastSet(reduced, "bc2"))
			.output(new DiscardingOutputFormat<String>());
	iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>()).withBroadcastSet(reduced, "bc3"))
			.output(new DiscardingOutputFormat<String>());
	
	Plan plan = env.createProgramPlan();
	
	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #20
Source File: WorksetIterationsRecordApiCompilerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private Plan getTestPlan(boolean joinPreservesSolutionSet, boolean mapBeforeSolutionDelta) {

		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Tuple2<Long, Long>> solSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Solution Set");
		DataSet<Tuple2<Long, Long>> workSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Workset");
		DataSet<Tuple2<Long, Long>> invariantInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Invariant Input");

		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> deltaIt = solSetInput.iterateDelta(workSetInput, 100, 0).name(ITERATION_NAME);

		DataSet<Tuple2<Long, Long>> join1 = deltaIt.getWorkset().join(invariantInput).where(0).equalTo(0)
				.with(new IdentityJoiner<Tuple2<Long, Long>>())
				.withForwardedFieldsFirst("*").name(JOIN_WITH_INVARIANT_NAME);

		DataSet<Tuple2<Long, Long>> join2 = deltaIt.getSolutionSet().join(join1).where(0).equalTo(0)
				.with(new IdentityJoiner<Tuple2<Long, Long>>())
				.name(JOIN_WITH_SOLUTION_SET);
		if(joinPreservesSolutionSet) {
			((JoinOperator<?,?,?>)join2).withForwardedFieldsFirst("*");
		}

		DataSet<Tuple2<Long, Long>> nextWorkset = join2.groupBy(0).reduceGroup(new IdentityGroupReducer<Tuple2<Long, Long>>())
				.withForwardedFields("*").name(NEXT_WORKSET_REDUCER_NAME);

		if(mapBeforeSolutionDelta) {

			DataSet<Tuple2<Long, Long>> mapper = join2.map(new IdentityMapper<Tuple2<Long, Long>>())
					.withForwardedFields("*").name(SOLUTION_DELTA_MAPPER_NAME);

			deltaIt.closeWith(mapper, nextWorkset)
					.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
		}
		else {
			deltaIt.closeWith(join2, nextWorkset)
					.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		}

		return env.createProgramPlan();
	}
 
Example #21
Source File: ParallelismChangeTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 2nd map and 2nd reduce, so the hash partitioning from 1st reduce is not reusable.
 * Expected to re-establish partitioning between map and reduce (hash).
 */
@Test
public void checkPropertyHandlingWithIncreasingGlobalParallelism2() {
	final int p = DEFAULT_PARALLELISM;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, mapIn);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, reduceIn);
}
 
Example #22
Source File: ParallelismChangeTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 1st reduce and 2nd map, such that more tasks are on one instance.
 * Expected to re-establish partitioning between map and reduce via a local hash.
 */
@Test
public void checkPropertyHandlingWithIncreasingLocalParallelism() {
	final int p = DEFAULT_PARALLELISM * 2;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertTrue("Invalid ship strategy for an operator.", 
			(ShipStrategyType.PARTITION_RANDOM ==  mapIn && ShipStrategyType.PARTITION_HASH == reduceIn) || 
			(ShipStrategyType.PARTITION_HASH == mapIn && ShipStrategyType.FORWARD == reduceIn));
}
 
Example #23
Source File: ParallelismChangeTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void checkPropertyHandlingWithDecreasingParallelism() {
	final int p = DEFAULT_PARALLELISM;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);

	env
		.generateSequence(0, 1).setParallelism(p * 2)
		.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Map1")
		.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Reduce1")
		.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map2")
		.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p).name("Reduce2")
		.output(new DiscardingOutputFormat<Long>()).setParallelism(p).name("Sink");

	Plan plan = env.createProgramPlan();
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();

	Assert.assertTrue("The no sorting local strategy.",
			LocalStrategy.SORT == red2Node.getInput().getLocalStrategy() ||
					LocalStrategy.SORT == map2Node.getInput().getLocalStrategy());

	Assert.assertTrue("The no partitioning ship strategy.",
			ShipStrategyType.PARTITION_HASH == red2Node.getInput().getShipStrategy() ||
					ShipStrategyType.PARTITION_HASH == map2Node.getInput().getShipStrategy());
}
 
Example #24
Source File: BranchingPlansCompilerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testBranchingUnion() {
	try {
		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Long> source1 = env.generateSequence(0,1);
		DataSet<Long> source2 = env.generateSequence(0,1);

		DataSet<Long> join1 = source1.join(source2).where("*").equalTo("*")
				.with(new IdentityJoiner<Long>()).name("Join 1");

		DataSet<Long> map1 = join1.map(new IdentityMapper<Long>()).name("Map 1");

		DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1");

		DataSet<Long> reduce2 = join1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 2");

		DataSet<Long> map2 = join1.map(new IdentityMapper<Long>()).name("Map 2");

		DataSet<Long> map3 = map2.map(new IdentityMapper<Long>()).name("Map 3");

		DataSet<Long> join2 = reduce1.union(reduce2).union(map2).union(map3)
				.join(map2, JoinHint.REPARTITION_SORT_MERGE).where("*").equalTo("*")
				.with(new IdentityJoiner<Long>()).name("Join 2");

		join2.output(new DiscardingOutputFormat<Long>());

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);

		JobGraphGenerator jobGen = new JobGraphGenerator();
		
		//Compile plan to verify that no error is thrown
		jobGen.compileJobGraph(oPlan);
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #25
Source File: ParallelismChangeTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void checkPropertyHandlingWithDecreasingParallelism() {
	final int p = DEFAULT_PARALLELISM;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);

	env
		.generateSequence(0, 1).setParallelism(p * 2)
		.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Map1")
		.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Reduce1")
		.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map2")
		.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p).name("Reduce2")
		.output(new DiscardingOutputFormat<Long>()).setParallelism(p).name("Sink");

	Plan plan = env.createProgramPlan();
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();

	Assert.assertTrue("The no sorting local strategy.",
			LocalStrategy.SORT == red2Node.getInput().getLocalStrategy() ||
					LocalStrategy.SORT == map2Node.getInput().getLocalStrategy());

	Assert.assertTrue("The no partitioning ship strategy.",
			ShipStrategyType.PARTITION_HASH == red2Node.getInput().getShipStrategy() ||
					ShipStrategyType.PARTITION_HASH == map2Node.getInput().getShipStrategy());
}
 
Example #26
Source File: BranchingPlansCompilerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultipleIterationsWithClosueBCVars() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(100);

	DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
		
	IterativeDataSet<String> iteration1 = input.iterate(100);
	IterativeDataSet<String> iteration2 = input.iterate(20);
	IterativeDataSet<String> iteration3 = input.iterate(17);
	
	
	iteration1.closeWith(iteration1.map(new IdentityMapper<String>()))
			.output(new DiscardingOutputFormat<String>());
	iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>()))
			.output(new DiscardingOutputFormat<String>());
	iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>()))
			.output(new DiscardingOutputFormat<String>());
	
	Plan plan = env.createProgramPlan();
	
	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #27
Source File: WorksetIterationsRecordApiCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private Plan getTestPlan(boolean joinPreservesSolutionSet, boolean mapBeforeSolutionDelta) {

		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Tuple2<Long, Long>> solSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Solution Set");
		DataSet<Tuple2<Long, Long>> workSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Workset");
		DataSet<Tuple2<Long, Long>> invariantInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Invariant Input");

		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> deltaIt = solSetInput.iterateDelta(workSetInput, 100, 0).name(ITERATION_NAME);

		DataSet<Tuple2<Long, Long>> join1 = deltaIt.getWorkset().join(invariantInput).where(0).equalTo(0)
				.with(new IdentityJoiner<Tuple2<Long, Long>>())
				.withForwardedFieldsFirst("*").name(JOIN_WITH_INVARIANT_NAME);

		DataSet<Tuple2<Long, Long>> join2 = deltaIt.getSolutionSet().join(join1).where(0).equalTo(0)
				.with(new IdentityJoiner<Tuple2<Long, Long>>())
				.name(JOIN_WITH_SOLUTION_SET);
		if(joinPreservesSolutionSet) {
			((JoinOperator<?,?,?>)join2).withForwardedFieldsFirst("*");
		}

		DataSet<Tuple2<Long, Long>> nextWorkset = join2.groupBy(0).reduceGroup(new IdentityGroupReducer<Tuple2<Long, Long>>())
				.withForwardedFields("*").name(NEXT_WORKSET_REDUCER_NAME);

		if(mapBeforeSolutionDelta) {

			DataSet<Tuple2<Long, Long>> mapper = join2.map(new IdentityMapper<Tuple2<Long, Long>>())
					.withForwardedFields("*").name(SOLUTION_DELTA_MAPPER_NAME);

			deltaIt.closeWith(mapper, nextWorkset)
					.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
		}
		else {
			deltaIt.closeWith(join2, nextWorkset)
					.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		}

		return env.createProgramPlan();
	}
 
Example #28
Source File: ParallelismChangeTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 1st reduce and 2nd map, so the hash partitioning from 1st reduce is not reusable.
 * Expected to re-establish partitioning between reduce and map, via hash, because random is a full network
 * transit as well.
 */
@Test
public void checkPropertyHandlingWithIncreasingGlobalParallelism1() {
	final int p = DEFAULT_PARALLELISM;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
				.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
				.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
				.withForwardedFields("*").setParallelism(p * 2).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
				.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType redIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, mapIn);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, redIn);
}
 
Example #29
Source File: ParallelismChangeTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 2nd map and 2nd reduce, so the hash partitioning from 1st reduce is not reusable.
 * Expected to re-establish partitioning between map and reduce (hash).
 */
@Test
public void checkPropertyHandlingWithIncreasingGlobalParallelism2() {
	final int p = DEFAULT_PARALLELISM;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, mapIn);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, reduceIn);
}
 
Example #30
Source File: ParallelismChangeTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 1st reduce and 2nd map, such that more tasks are on one instance.
 * Expected to re-establish partitioning between map and reduce via a local hash.
 */
@Test
public void checkPropertyHandlingWithIncreasingLocalParallelism() {
	final int p = DEFAULT_PARALLELISM * 2;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertTrue("Invalid ship strategy for an operator.", 
			(ShipStrategyType.PARTITION_RANDOM ==  mapIn && ShipStrategyType.PARTITION_HASH == reduceIn) || 
			(ShipStrategyType.PARTITION_HASH == mapIn && ShipStrategyType.FORWARD == reduceIn));
}