Java Code Examples for org.apache.flink.api.common.Plan

The following examples show how to use org.apache.flink.api.common.Plan. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: flink   Source File: HardPlansCompilationTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Source -> Map -> Reduce -> Cross -> Reduce -> Cross -> Reduce ->
 * |--------------------------/                  /
 * |--------------------------------------------/
 * 
 * First cross has SameKeyFirst output contract
 */
@Test
public void testTicket158() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);

	set1.map(new IdentityMapper<Long>()).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
			.cross(set1).with(new IdentityCrosser<Long>()).withForwardedFieldsFirst("*").name("Cross1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce2")
			.cross(set1).with(new IdentityCrosser<Long>()).name("Cross2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce3")
			.output(new DiscardingOutputFormat<Long>()).name("Sink");

	Plan plan = env.createProgramPlan();
	OptimizedPlan oPlan = compileNoStats(plan);

	JobGraphGenerator jobGen = new JobGraphGenerator();
	jobGen.compileJobGraph(oPlan);
}
 
Example 2
Source Project: flink   Source File: RemoteEnvironment.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public String getExecutionPlan() throws Exception {
	Plan p = createProgramPlan("plan", false);

	// make sure that we do not start an new executor here
	// if one runs, fine, of not, we create a local executor (lightweight) and let it
	// generate the plan
	if (executor != null) {
		return executor.getOptimizerPlanAsJSON(p);
	}
	else {
		PlanExecutor le = PlanExecutor.createLocalExecutor(null);
		String plan = le.getOptimizerPlanAsJSON(p);

		le.stop();

		return plan;
	}
}
 
Example 3
Source Project: flink   Source File: ClientTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testGetExecutionPlan() throws ProgramInvocationException {
	PackagedProgram prg = PackagedProgram.newBuilder()
		.setEntryPointClassName(TestOptimizerPlan.class.getName())
		.setArguments("/dev/random", "/tmp")
		.build();

	Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config);
	Plan plan = (Plan) PackagedProgramUtils.getPipelineFromProgram(prg, new Configuration(), 1, true);
	OptimizedPlan op = optimizer.compile(plan);
	assertNotNull(op);

	PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
	assertNotNull(dumper.getOptimizerPlanAsJSON(op));

	// test HTML escaping
	PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator();
	dumper2.setEncodeForHTML(true);
	String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op);

	assertEquals(-1, htmlEscaped.indexOf('\\'));
}
 
Example 4
/**
 * This test makes sure that only a HYBRIDHASH on the static path is transformed to the cached variant
 */
@Test
public void testRightSideCountercheck() {
	try {
		
		Plan plan = getTestPlanRightStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_FIRST);
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, innerJoin.getDriverStrategy()); 
		assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.CACHED, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}
 
Example 5
Source Project: Flink-CEPplus   Source File: Optimizer.java    License: Apache License 2.0 6 votes vote down vote up
private OptimizerPostPass getPostPassFromPlan(Plan program) {
	final String className = program.getPostPassClassName();
	if (className == null) {
		throw new CompilerException("Optimizer Post Pass class description is null");
	}
	try {
		Class<? extends OptimizerPostPass> clazz = Class.forName(className).asSubclass(OptimizerPostPass.class);
		try {
			return InstantiationUtil.instantiate(clazz, OptimizerPostPass.class);
		} catch (RuntimeException rtex) {
			// unwrap the source exception
			if (rtex.getCause() != null) {
				throw new CompilerException("Cannot instantiate optimizer post pass: " + rtex.getMessage(), rtex.getCause());
			} else {
				throw rtex;
			}
		}
	}
	catch (ClassNotFoundException cnfex) {
		throw new CompilerException("Cannot load Optimizer post-pass class '" + className + "'.", cnfex);
	}
	catch (ClassCastException ccex) {
		throw new CompilerException("Class '" + className + "' is not an optimizer post-pass.", ccex);
	}
}
 
Example 6
Source Project: flink   Source File: HardPlansCompilationTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Source -> Map -> Reduce -> Cross -> Reduce -> Cross -> Reduce ->
 * |--------------------------/                  /
 * |--------------------------------------------/
 * 
 * First cross has SameKeyFirst output contract
 */
@Test
public void testTicket158() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);

	set1.map(new IdentityMapper<Long>()).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
			.cross(set1).with(new IdentityCrosser<Long>()).withForwardedFieldsFirst("*").name("Cross1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce2")
			.cross(set1).with(new IdentityCrosser<Long>()).name("Cross2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce3")
			.output(new DiscardingOutputFormat<Long>()).name("Sink");

	Plan plan = env.createProgramPlan();
	OptimizedPlan oPlan = compileNoStats(plan);

	JobGraphGenerator jobGen = new JobGraphGenerator();
	jobGen.compileJobGraph(oPlan);
}
 
Example 7
Source Project: flink   Source File: UnionReplacementTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testUnionReplacement() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<String> input1 = env.fromElements("test1");
		DataSet<String> input2 = env.fromElements("test2");

		DataSet<String> union = input1.union(input2);

		union.output(new DiscardingOutputFormat<String>());
		union.output(new DiscardingOutputFormat<String>());

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 8
/**
 * This tests whether a HYBRIDHASH_BUILD_SECOND is correctly transformed to a HYBRIDHASH_BUILD_SECOND_CACHED
 * when inside of an iteration an on the static path
 */
@Test
public void testRightSide() {
	try {
		
		Plan plan = getTestPlanRightStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_SECOND);
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND_CACHED, innerJoin.getDriverStrategy()); 
		assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}
 
Example 9
@Test
public void testUnaryFunctionAllForwardedExceptAnnotation() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	@SuppressWarnings("unchecked")
	DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L));
	input.map(new AllForwardedExceptMapper<Tuple3<Long, Long, Long>>()).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
	Plan plan = env.createProgramPlan();

	GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
	MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput();

	SingleInputSemanticProperties semantics = mapper.getSemanticProperties();

	FieldSet fw1 = semantics.getForwardingTargetFields(0, 0);
	FieldSet fw2 = semantics.getForwardingTargetFields(0, 2);
	assertNotNull(fw1);
	assertNotNull(fw2);
	assertTrue(fw1.contains(0));
	assertTrue(fw2.contains(2));
}
 
Example 10
Source Project: flink   Source File: CachedMatchStrategyCompilerTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * This test makes sure that only a HYBRIDHASH on the static path is transformed to the cached variant
 */
@Test
public void testLeftSideCountercheck() {
	try {
		
		Plan plan = getTestPlanLeftStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_SECOND);
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND, innerJoin.getDriverStrategy());
		assertEquals(TempMode.CACHED, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}
 
Example 11
Source Project: flink   Source File: SemanticPropertiesPrecedenceTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testFunctionForwardedAnnotationPrecedence() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	@SuppressWarnings("unchecked")
	DataSet<Tuple3<Long, String, Integer>> input = env.fromElements(Tuple3.of(3L, "test", 42));
	input
			.map(new WildcardForwardedMapperWithForwardAnnotation<Tuple3<Long, String, Integer>>())
			.output(new DiscardingOutputFormat<Tuple3<Long, String, Integer>>());
	Plan plan = env.createProgramPlan();

	GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
	MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput();

	SingleInputSemanticProperties semantics = mapper.getSemanticProperties();

	FieldSet fw1 = semantics.getForwardingTargetFields(0, 0);
	FieldSet fw2 = semantics.getForwardingTargetFields(0, 1);
	FieldSet fw3 = semantics.getForwardingTargetFields(0, 2);
	assertNotNull(fw1);
	assertNotNull(fw2);
	assertNotNull(fw3);
	assertTrue(fw1.contains(0));
	assertFalse(fw2.contains(1));
	assertFalse(fw3.contains(2));
}
 
Example 12
Source Project: flink   Source File: ReplicatingDataSourceTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Tests compiler fail for join program with replicated data source behind rebalance.
 */
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputBehindRebalance() {
	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.rebalance()
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
}
 
Example 13
Source Project: flink   Source File: OperatorResolver.java    License: Apache License 2.0 5 votes vote down vote up
public OperatorResolver(Plan p) {
	this.map = new HashMap<String, List<Operator<?>>>();
	this.seen = new HashSet<Operator<?>>();
	
	p.accept(this);
	this.seen = null;
}
 
Example 14
Source Project: flink   Source File: PropertyDataSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource3() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(1)
			.splitsGroupedBy(0);

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 15
Source Project: flink   Source File: PartitionOperatorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRangePartitionOperatorPreservesFields() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Tuple2<Long, Long>> data = env.fromCollection(Collections.singleton(new Tuple2<>(0L, 0L)));

		data.partitionByRange(1)
			.groupBy(1)
			.reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Long,Long>>())
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());

		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode partitionNode = (SingleInputPlanNode)reducer.getInput().getSource();
		SingleInputPlanNode partitionIDRemover = (SingleInputPlanNode) partitionNode.getInput().getSource();

		assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, partitionNode.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitionIDRemover.getInput().getShipStrategy());

		SourcePlanNode sourcePlanNode = op.getDataSources().iterator().next();
		List<Channel> sourceOutgoingChannels = sourcePlanNode.getOutgoingChannels();
		assertEquals(2, sourceOutgoingChannels.size());
		assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(0).getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(1).getShipStrategy());
		assertEquals(DataExchangeMode.PIPELINED, sourceOutgoingChannels.get(0).getDataExchangeMode());
		assertEquals(DataExchangeMode.BATCH, sourceOutgoingChannels.get(1).getDataExchangeMode());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 16
Source Project: flink   Source File: LocalEnvironment.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public String getExecutionPlan() throws Exception {
	Plan p = createProgramPlan(null, false);

	// make sure that we do not start an executor in any case here.
	// if one runs, fine, of not, we only create the class but disregard immediately afterwards
	if (executor != null) {
		return executor.getOptimizerPlanAsJSON(p);
	}
	else {
		PlanExecutor tempExecutor = PlanExecutor.createLocalExecutor(configuration);
		return tempExecutor.getOptimizerPlanAsJSON(p);
	}
}
 
Example 17
Source Project: Flink-CEPplus   Source File: PartitionOperatorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testPartitionCustomOperatorPreservesFields() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Long, Long>> data = env.fromCollection(Collections.singleton(new Tuple2<>(0L, 0L)));
		
		data.partitionCustom(new Partitioner<Long>() {
				public int partition(Long key, int numPartitions) { return key.intValue(); }
			}, 1)
			.groupBy(1)
			.reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Long, Long>>())
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode partitioner = (SingleInputPlanNode) reducer.getInput().getSource();

		assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitioner.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 18
Source Project: flink   Source File: SemanticPropertiesProjectionTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCrossProjectionSemProps1() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo);

	tupleDs.cross(tupleDs)
			.projectFirst(2, 3)
			.projectSecond(1, 4)
			.output(new DiscardingOutputFormat<Tuple>());

	Plan plan = env.createProgramPlan();

	GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
	CrossOperatorBase<?, ?, ?, ?> projectCrossOperator = ((CrossOperatorBase<?, ?, ?, ?>) sink.getInput());

	DualInputSemanticProperties props = projectCrossOperator.getSemanticProperties();

	assertEquals(1, props.getForwardingTargetFields(0, 2).size());
	assertEquals(1, props.getForwardingTargetFields(0, 3).size());
	assertEquals(1, props.getForwardingTargetFields(1, 1).size());
	assertEquals(1, props.getForwardingTargetFields(1, 4).size());

	assertTrue(props.getForwardingTargetFields(0, 2).contains(0));
	assertTrue(props.getForwardingTargetFields(0, 3).contains(1));
	assertTrue(props.getForwardingTargetFields(1, 1).contains(2));
	assertTrue(props.getForwardingTargetFields(1, 4).contains(3));
}
 
Example 19
Source Project: flink   Source File: RelationalQueryCompilerTest.java    License: Apache License 2.0 5 votes vote down vote up
private void testQueryGeneric(long orderSize, long lineItemSize,
		float ordersFilterFactor, float joinFilterFactor,
		boolean broadcastOkay, boolean partitionedOkay,
		boolean hashJoinFirstOkay, boolean hashJoinSecondOkay, boolean mergeJoinOkay) throws Exception {
	Plan p = getTPCH3Plan();
	p.setExecutionConfig(defaultExecutionConfig);
	testQueryGeneric(p, orderSize, lineItemSize, ordersFilterFactor, joinFilterFactor, broadcastOkay, partitionedOkay, hashJoinFirstOkay, hashJoinSecondOkay, mergeJoinOkay);
}
 
Example 20
Source Project: Flink-CEPplus   Source File: PipelineBreakerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testPipelineBreakerBroadcastedAllReduce() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.getConfig().setExecutionMode(ExecutionMode.PIPELINED);
		env.setParallelism(64);
		
		DataSet<Long> sourceWithMapper = env.generateSequence(1, 10).map(new IdentityMapper<Long>());
		
		DataSet<Long> bcInput1 = sourceWithMapper
									.map(new IdentityMapper<Long>())
									.reduce(new SelectOneReducer<Long>());
		DataSet<Long> bcInput2 = env.generateSequence(1, 10);
		
		DataSet<Long> result = sourceWithMapper
				.map(new IdentityMapper<Long>())
						.withBroadcastSet(bcInput1, "bc1")
						.withBroadcastSet(bcInput2, "bc2");
		
		result.output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource();

		assertEquals(TempMode.NONE, mapper.getInput().getTempMode());
		assertEquals(DataExchangeMode.BATCH, mapper.getInput().getDataExchangeMode());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 21
Source Project: flink   Source File: PropertyDataSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedOrderedSource4() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(0, 1)
			.splitsOrderedBy(new int[]{1}, new Order[]{Order.DESCENDING});

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0, 1)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 22
Source Project: flink   Source File: PropertyDataSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource2() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(0)
			.splitsGroupedBy(1, 0);

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(0, 1)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 23
Source Project: flink   Source File: NestedIterationsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testBulkIterationInClosure() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Long> data1 = env.generateSequence(1, 100);
		DataSet<Long> data2 = env.generateSequence(1, 100);
		
		IterativeDataSet<Long> firstIteration = data1.iterate(100);
		
		DataSet<Long> firstResult = firstIteration.closeWith(firstIteration.map(new IdentityMapper<Long>()));
		
		
		IterativeDataSet<Long> mainIteration = data2.map(new IdentityMapper<Long>()).iterate(100);
		
		DataSet<Long> joined = mainIteration.join(firstResult)
				.where(new IdentityKeyExtractor<Long>()).equalTo(new IdentityKeyExtractor<Long>())
				.with(new DummyFlatJoinFunction<Long>());
		
		DataSet<Long> mainResult = mainIteration.closeWith(joined);
		
		mainResult.output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		
		// optimizer should be able to translate this
		OptimizedPlan op = compileNoStats(p);
		
		// job graph generator should be able to translate this
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 24
Source Project: Flink-CEPplus   Source File: BranchingPlansCompilerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testBranchingUnion() {
	try {
		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Long> source1 = env.generateSequence(0,1);
		DataSet<Long> source2 = env.generateSequence(0,1);

		DataSet<Long> join1 = source1.join(source2).where("*").equalTo("*")
				.with(new IdentityJoiner<Long>()).name("Join 1");

		DataSet<Long> map1 = join1.map(new IdentityMapper<Long>()).name("Map 1");

		DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1");

		DataSet<Long> reduce2 = join1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 2");

		DataSet<Long> map2 = join1.map(new IdentityMapper<Long>()).name("Map 2");

		DataSet<Long> map3 = map2.map(new IdentityMapper<Long>()).name("Map 3");

		DataSet<Long> join2 = reduce1.union(reduce2).union(map2).union(map3)
				.join(map2, JoinHint.REPARTITION_SORT_MERGE).where("*").equalTo("*")
				.with(new IdentityJoiner<Long>()).name("Join 2");

		join2.output(new DiscardingOutputFormat<Long>());

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);

		JobGraphGenerator jobGen = new JobGraphGenerator();
		
		//Compile plan to verify that no error is thrown
		jobGen.compileJobGraph(oPlan);
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 25
Source Project: flink   Source File: KMeansSingleStepTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCompileKMeansSingleStepWithStats() {

	Plan p = getKMeansPlan();
	p.setExecutionConfig(new ExecutionConfig());
	// set the statistics
	OperatorResolver cr = getContractResolver(p);
	GenericDataSourceBase<?, ?> pointsSource = cr.getNode(DATAPOINTS);
	GenericDataSourceBase<?, ?> centersSource = cr.getNode(CENTERS);
	setSourceStatistics(pointsSource, 100L * 1024 * 1024 * 1024, 32f);
	setSourceStatistics(centersSource, 1024 * 1024, 32f);

	OptimizedPlan plan = compileWithStats(p);
	checkPlan(plan);
}
 
Example 26
Source Project: flink   Source File: ReplicatingDataSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source behind map partition.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMapPartition() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.mapPartition(new IdPMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example 27
Source Project: Flink-CEPplus   Source File: PropertyDataSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource4() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(0, 1)
			.splitsGroupedBy(0);

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0, 1)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 28
Source Project: Flink-CEPplus   Source File: BranchingPlansCompilerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCostComputationWithMultipleDataSinks() {
	final int SINKS = 5;

	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);

		DataSet<Long> source = env.generateSequence(1, 10000);

		DataSet<Long> mappedA = source.map(new IdentityMapper<Long>());
		DataSet<Long> mappedC = source.map(new IdentityMapper<Long>());

		for (int sink = 0; sink < SINKS; sink++) {
			mappedA.output(new DiscardingOutputFormat<Long>());
			mappedC.output(new DiscardingOutputFormat<Long>());
		}

		Plan plan = env.createProgramPlan("Plans With Multiple Data Sinks");
		OptimizedPlan oPlan = compileNoStats(plan);

		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 29
Source Project: flink   Source File: PlanTranslator.java    License: Apache License 2.0 5 votes vote down vote up
private JobGraph compilePlan(Plan plan, Configuration optimizerConfiguration) {
	Optimizer optimizer = new Optimizer(new DataStatistics(), optimizerConfiguration);
	OptimizedPlan optimizedPlan = optimizer.compile(plan);

	JobGraphGenerator jobGraphGenerator = new JobGraphGenerator(optimizerConfiguration);
	return jobGraphGenerator.compileJobGraph(optimizedPlan, plan.getJobId());
}
 
Example 30
Source Project: Flink-CEPplus   Source File: DistinctTranslationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctPosition() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.distinct(1, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {1, 2}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}