Java Code Examples for org.apache.flink.api.java.ExecutionEnvironment#setParallelism()

The following examples show how to use org.apache.flink.api.java.ExecutionEnvironment#setParallelism() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SortPartitionITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testSortPartitionByNestedFieldExpression() throws Exception {
	/*
	 * Test sort partition on nested field expressions
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(3);

	DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds = CollectionDataSets.getGroupSortedNestedTupleDataSet(env);
	List<Tuple1<Boolean>> result = ds
			.map(new IdMapper<Tuple2<Tuple2<Integer, Integer>, String>>()).setParallelism(3) // parallelize input
			.sortPartition("f0.f1", Order.ASCENDING)
			.sortPartition("f1", Order.DESCENDING)
			.mapPartition(new OrderCheckMapper<>(new NestedTupleChecker()))
			.distinct().collect();

	String expected = "(true)\n";

	compareResultAsText(result, expected);
}
 
Example 2
Source File: ReplicatingDataSourceTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Tests compiler fail for join program with replicated data source behind rebalance.
 */
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputBehindRebalance() {
	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.rebalance()
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
}
 
Example 3
Source File: BootstrapTransformationTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testOperatorSpecificMaxParallelismRespected() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSource<Integer> input = env.fromElements(0);

	BootstrapTransformation<Integer> transformation = OperatorTransformation
		.bootstrapWith(input)
		.setMaxParallelism(1)
		.transform(new ExampleStateBootstrapFunction());

	int maxParallelism = transformation.getMaxParallelism(4);
	DataSet<TaggedOperatorSubtaskState> result = transformation.writeOperatorSubtaskStates(
		OperatorIDGenerator.fromUid("uid"),
		new MemoryStateBackend(),
		new Path(),
		maxParallelism
	);

	Assert.assertEquals("The parallelism of a data set should be constrained my the savepoint max parallelism", 1, getParallelism(result));
}
 
Example 4
Source File: SortPartitionITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testSortPartitionByFieldExpression() throws Exception {
	/*
	 * Test sort partition on field expression
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	List<Tuple1<Boolean>> result = ds
			.map(new IdMapper()).setParallelism(4) // parallelize input
			.sortPartition("f1", Order.DESCENDING)
			.mapPartition(new OrderCheckMapper<>(new Tuple3Checker()))
			.distinct().collect();

	String expected = "(true)\n";

	compareResultAsText(result, expected);
}
 
Example 5
Source File: SortPartitionITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testSortPartitionByTwoFieldExpressions() throws Exception {
	/*
	 * Test sort partition on two field expressions
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(2);

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.get5TupleDataSet(env);
	List<Tuple1<Boolean>> result = ds
			.map(new IdMapper<Tuple5<Integer, Long, Integer, String, Long>>()).setParallelism(2) // parallelize input
			.sortPartition("f4", Order.ASCENDING)
			.sortPartition("f2", Order.DESCENDING)
			.mapPartition(new OrderCheckMapper<>(new Tuple5Checker()))
			.distinct().collect();

	String expected = "(true)\n";

	compareResultAsText(result, expected);
}
 
Example 6
Source File: GroupReduceITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testTupleKeySelectorGroupSort() throws Exception {
	/*
	 * check correctness of sorted groupReduce on tuples with keyselector sorting
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> reduceDs = ds
			.groupBy(new LongFieldExtractor<Tuple3<Integer, Long, String>>(1))
			.sortGroup(new StringFieldExtractor<Tuple3<Integer, Long, String>>(2), Order.DESCENDING)
			.reduceGroup(new Tuple3SortedGroupReduce());

	List<Tuple3<Integer, Long, String>> result = reduceDs.collect();

	String expected = "1,1,Hi\n"
			+
			"5,2,Hello world-Hello\n" +
			"15,3,Luke Skywalker-I am fine.-Hello world, how are you?\n" +
			"34,4,Comment#4-Comment#3-Comment#2-Comment#1\n" +
			"65,5,Comment#9-Comment#8-Comment#7-Comment#6-Comment#5\n" +
			"111,6,Comment#15-Comment#14-Comment#13-Comment#12-Comment#11-Comment#10\n";

	compareResultAsTuples(result, expected);
}
 
Example 7
Source File: IPv6HostnamesITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testClusterWithIPv6host() {
	try {

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(4);
		env.getConfig().disableSysoutLogging();

		// get input data
		DataSet<String> text = env.fromElements(WordCountData.TEXT.split("\n"));

		DataSet<Tuple2<String, Integer>> counts = text
				.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
					@Override
					public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
						for (String token : value.toLowerCase().split("\\W+")) {
							if (token.length() > 0) {
								out.collect(new Tuple2<String, Integer>(token, 1));
							}
						}
					}
				})
				.groupBy(0).sum(1);

		List<Tuple2<String, Integer>> result = counts.collect();

		TestBaseUtils.compareResultAsText(result, WordCountData.COUNTS_AS_TUPLES);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 8
Source File: FlinkTableITCase.java    From flink-connectors with Apache License 2.0 5 votes vote down vote up
@Test
public void testBatchTableSinkUsingDescriptor() throws Exception {

    // create a Pravega stream for test purposes
    Stream stream = Stream.of(setupUtils.getScope(), "testBatchTableSinkUsingDescriptor");
    this.setupUtils.createTestStream(stream.getStreamName(), 1);

    // create a Flink Table environment
    ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(1);
    BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env);

    Table table = tableEnv.fromDataSet(env.fromCollection(SAMPLES));

    Pravega pravega = new Pravega();
    pravega.tableSinkWriterBuilder()
            .withRoutingKeyField("category")
            .forStream(stream)
            .withPravegaConfig(setupUtils.getPravegaConfig());

    ConnectTableDescriptor desc = tableEnv.connect(pravega)
            .withFormat(new Json().failOnMissingField(true))
            .withSchema(new Schema().field("category", DataTypes.STRING()).
                    field("value", DataTypes.INT()));
    desc.createTemporaryTable("test");

    final Map<String, String> propertiesMap = desc.toProperties();
    final TableSink<?> sink = TableFactoryService.find(BatchTableSinkFactory.class, propertiesMap)
            .createBatchTableSink(propertiesMap);

    String tableSinkPath = tableEnv.getCurrentDatabase() + "." + "PravegaSink";

    ConnectorCatalogTable<?, ?> connectorCatalogSinkTable = ConnectorCatalogTable.sink(sink, true);

    tableEnv.getCatalog(tableEnv.getCurrentCatalog()).get().createTable(
            ObjectPath.fromString(tableSinkPath),
            connectorCatalogSinkTable, false);
    table.insertInto("PravegaSink");
    env.execute();
}
 
Example 9
Source File: ReplicatingDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source behind filter.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindFilter() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.filter(new NoFilter())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example 10
Source File: BatchPojoExample.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(1);

		List<CustomCassandraAnnotatedPojo> customCassandraAnnotatedPojos = IntStream.range(0, 20)
				.mapToObj(x -> new CustomCassandraAnnotatedPojo(UUID.randomUUID().toString(), x, 0))
				.collect(Collectors.toList());

		DataSet<CustomCassandraAnnotatedPojo> dataSet = env.fromCollection(customCassandraAnnotatedPojos);

		ClusterBuilder clusterBuilder = new ClusterBuilder() {
			private static final long serialVersionUID = -1754532803757154795L;

			@Override
			protected Cluster buildCluster(Cluster.Builder builder) {
				return builder.addContactPoints("127.0.0.1").build();
			}
		};

		dataSet.output(new CassandraPojoOutputFormat<>(clusterBuilder, CustomCassandraAnnotatedPojo.class, () -> new Mapper.Option[]{Mapper.Option.saveNullFields(true)}));

		env.execute("Write");

		/*
		 *	This is for the purpose of showing an example of creating a DataSet using CassandraPojoInputFormat.
		 */
		DataSet<CustomCassandraAnnotatedPojo> inputDS = env
			.createInput(new CassandraPojoInputFormat<>(
				SELECT_QUERY,
				clusterBuilder,
				CustomCassandraAnnotatedPojo.class,
				() -> new Mapper.Option[]{Mapper.Option.consistencyLevel(ConsistencyLevel.ANY)}
			));

		inputDS.print();
	}
 
Example 11
Source File: PropertyDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedSource1() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(0);

	data.output(new DiscardingOutputFormat<Tuple2<Long,String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 12
Source File: JoinITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testJoinWithRangePartitioning() throws Exception {
	/*
	 * Test Join on tuples with multiple key field positions and same customized distribution
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);

	env.setParallelism(4);
	TestDistribution testDis = new TestDistribution();
	DataSet<Tuple2<String, String>> joinDs =
			DataSetUtils.partitionByRange(ds1, testDis, 0, 1)
					.join(DataSetUtils.partitionByRange(ds2, testDis, 0, 4))
					.where(0, 1)
					.equalTo(0, 4)
					.with(new T3T5FlatJoin());

	List<Tuple2<String, String>> result = joinDs.collect();

	String expected = "Hi,Hallo\n" +
			"Hello,Hallo Welt\n" +
			"Hello world,Hallo Welt wie gehts?\n" +
			"Hello world,ABC\n" +
			"I am fine.,HIJ\n" +
			"I am fine.,IJK\n";

	compareResultAsTuples(result, expected);
}
 
Example 13
Source File: ReplicatingDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source behind map.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMap() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.map(new IdMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example 14
Source File: ParallelismChangeTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 1st reduce and 2nd map, such that more tasks are on one instance.
 * Expected to re-establish partitioning between map and reduce via a local hash.
 */
@Test
public void checkPropertyHandlingWithIncreasingLocalParallelism() {
	final int p = DEFAULT_PARALLELISM * 2;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertTrue("Invalid ship strategy for an operator.", 
			(ShipStrategyType.PARTITION_RANDOM ==  mapIn && ShipStrategyType.PARTITION_HASH == reduceIn) || 
			(ShipStrategyType.PARTITION_HASH == mapIn && ShipStrategyType.FORWARD == reduceIn));
}
 
Example 15
Source File: BatchPojoExample.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        List<CustomCassandraAnnotatedPojo> customCassandraAnnotatedPojos = IntStream.range(0, 20)
                .mapToObj(x -> new CustomCassandraAnnotatedPojo(UUID.randomUUID().toString(), x, 0))
                .collect(Collectors.toList());

        DataSet<CustomCassandraAnnotatedPojo> dataSet = env.fromCollection(customCassandraAnnotatedPojos);

        ClusterBuilder clusterBuilder = new ClusterBuilder() {
            private static final long serialVersionUID = -1754532803757154795L;

            @Override
            protected Cluster buildCluster(Cluster.Builder builder) {
                return builder.addContactPoints("127.0.0.1").build();
            }
        };

        dataSet.output(new CassandraPojoOutputFormat<>(clusterBuilder, CustomCassandraAnnotatedPojo.class, () -> new Mapper.Option[]{Mapper.Option.saveNullFields(true)}));

        env.execute("zhisheng");

        /*
         *	This is for the purpose of showing an example of creating a DataSet using CassandraPojoInputFormat.
         */
        DataSet<CustomCassandraAnnotatedPojo> inputDS = env
                .createInput(new CassandraPojoInputFormat<>(
                        SELECT_QUERY,
                        clusterBuilder,
                        CustomCassandraAnnotatedPojo.class,
                        () -> new Mapper.Option[]{Mapper.Option.consistencyLevel(ConsistencyLevel.ANY)}
                ));

        inputDS.print();
    }
 
Example 16
Source File: CoGroupITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testCoGroupWithRangePartitioning() throws Exception {
	/*
	 * Test coGroup on tuples with multiple key field positions and same customized distribution
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds1 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.get3TupleDataSet(env);

	env.setParallelism(4);
	TestDistribution testDis = new TestDistribution();
	DataSet<Tuple3<Integer, Long, String>> coGrouped =
			DataSetUtils.partitionByRange(ds1, testDis, 0, 4)
					.coGroup(DataSetUtils.partitionByRange(ds2, testDis, 0, 1))
					.where(0, 4)
					.equalTo(0, 1)
					.with(new Tuple5Tuple3CoGroup());

	List<Tuple3<Integer, Long, String>> result = coGrouped.collect();

	String expected = "1,1,Hallo\n" +
			"2,2,Hallo Welt\n" +
			"3,2,Hallo Welt wie gehts?\n" +
			"3,2,ABC\n" +
			"5,3,HIJ\n" +
			"5,3,IJK\n";

	compareResultAsTuples(result, expected);
}
 
Example 17
Source File: ReduceCompilationTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Test program compilation when the Reduce's combiner has been excluded
 * by setting {@code CombineHint.NONE}.
 */
@Test
public void testGroupedReduceWithoutCombiner() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(8);

	DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
		.name("source").setParallelism(6);

	data
		.groupBy(0)
		.reduce(new RichReduceFunction<Tuple2<String, Double>>() {
			@Override
			public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2) {
				return null;
			}
		}).setCombineHint(CombineHint.NONE).name("reducer")
		.output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");

	Plan p = env.createProgramPlan();
	OptimizedPlan op = compileNoStats(p);

	OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);

	// get the original nodes
	SourcePlanNode sourceNode = resolver.getNode("source");
	SingleInputPlanNode reduceNode = resolver.getNode("reducer");
	SinkPlanNode sinkNode = resolver.getNode("sink");

	// check wiring
	assertEquals(sourceNode, reduceNode.getInput().getSource());

	// check the strategies
	assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());

	// check the keys
	assertEquals(new FieldList(0), reduceNode.getKeys(0));
	assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());

	// check parallelism
	assertEquals(6, sourceNode.getParallelism());
	assertEquals(8, reduceNode.getParallelism());
	assertEquals(8, sinkNode.getParallelism());
}
 
Example 18
Source File: BranchingPlansCompilerTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * <pre>
 *                              SINK
 *                               |
 *                            COGROUP
 *                        +---/    \----+
 *                       /               \
 *                      /             MATCH10
 *                     /               |    \
 *                    /                |  MATCH9
 *                MATCH5               |  |   \
 *                |   \                |  | MATCH8
 *                | MATCH4             |  |  |   \
 *                |  |   \             |  |  | MATCH7
 *                |  | MATCH3          |  |  |  |   \
 *                |  |  |   \          |  |  |  | MATCH6
 *                |  |  | MATCH2       |  |  |  |  |  |
 *                |  |  |  |   \       +--+--+--+--+--+
 *                |  |  |  | MATCH1            MAP 
 *                \  |  |  |  |  | /-----------/
 *                (DATA SOURCE ONE)
 * </pre>
 */
@Test
public void testBranchingSourceMultipleTimes() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);

		DataSet<Tuple2<Long, Long>> source = env.generateSequence(1, 10000000)
			.map(new Duplicator<Long>());

		DataSet<Tuple2<Long, Long>> joined1 = source.join(source).where(0).equalTo(0)
													.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

		DataSet<Tuple2<Long, Long>> joined2 = source.join(joined1).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

		DataSet<Tuple2<Long, Long>> joined3 = source.join(joined2).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

		DataSet<Tuple2<Long, Long>> joined4 = source.join(joined3).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

		DataSet<Tuple2<Long, Long>> joined5 = source.join(joined4).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

		DataSet<Tuple2<Long, Long>> mapped = source.map(
				new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
					@Override
					public Tuple2<Long, Long> map(Tuple2<Long, Long> value) {
						return null;
					}
		});

		DataSet<Tuple2<Long, Long>> joined6 = mapped.join(mapped).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

		DataSet<Tuple2<Long, Long>> joined7 = mapped.join(joined6).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

		DataSet<Tuple2<Long, Long>> joined8 = mapped.join(joined7).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

		DataSet<Tuple2<Long, Long>> joined9 = mapped.join(joined8).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

		DataSet<Tuple2<Long, Long>> joined10 = mapped.join(joined9).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());


		joined5.coGroup(joined10)
				.where(1).equalTo(1)
				.with(new DummyCoGroupFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>())

			.output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>());

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 19
Source File: IterationsCompilerTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testTwoIterationsDirectlyChained() throws Exception {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(8);
		
		DataSet<Tuple2<Long, Long>> verticesWithInitialId = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
		
		DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
		
		DataSet<Tuple2<Long, Long>> bulkResult = doBulkIteration(verticesWithInitialId, edges);
		
		DataSet<Tuple2<Long, Long>> depResult = doDeltaIteration(bulkResult, edges);
		
		depResult.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		assertEquals(1, op.getDataSinks().size());
		assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof WorksetIterationPlanNode);
		
		WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
		BulkIterationPlanNode bipn = (BulkIterationPlanNode)wipn.getInput1().getSource();

		// the hash partitioning has been pushed out of the delta iteration into the bulk iteration
		assertEquals(ShipStrategyType.FORWARD, wipn.getInput1().getShipStrategy());

		// the input of the root step function is the last operator of the step function
		// since the work has been pushed out of the bulk iteration, it has to guarantee the hash partitioning
		for (Channel c : bipn.getRootOfStepFunction().getInputs()) {
			assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
		}

		assertEquals(DataExchangeMode.BATCH, wipn.getInput1().getDataExchangeMode());
		assertEquals(DataExchangeMode.BATCH, wipn.getInput2().getDataExchangeMode());
		
		assertEquals(TempMode.NONE, wipn.getInput1().getTempMode());
		assertEquals(TempMode.NONE, wipn.getInput2().getTempMode());
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 20
Source File: dVMPTest.java    From toolbox with Apache License 2.0 4 votes vote down vote up
public void testingMLParallelWasteHidden() throws IOException, ClassNotFoundException {
    //Set-up Flink session.
    Configuration conf = new Configuration();
    conf.setInteger("taskmanager.network.numberOfBuffers", 12000);
    final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf);
            env.getConfig().disableSysoutLogging();         env.setParallelism(Main.PARALLELISM);


    // load the true WasteIncinerator Bayesian network
    BayesianNetwork wasteIncinerator = BayesianNetworkLoader.loadFromFile("../networks/simulated/WasteIncinerator.bn");
    wasteIncinerator.randomInitialization(new Random(0));
    if (Main.VERBOSE) System.out.println("\nAsia network \n ");
    //if (Main.VERBOSE) System.out.println(asianet.getDAG().outputString());
    if (Main.VERBOSE) System.out.println(wasteIncinerator.toString());

    //Sampling from WasteIncinerator BN
    BayesianNetworkSampler sampler = new BayesianNetworkSampler(wasteIncinerator);
    sampler.setSeed(0);
    //Load the sampled data
    DataStream<DataInstance> data = sampler.sampleToDataStream(1000);
    sampler.setHiddenVar(wasteIncinerator.getVariables().getVariableById(6));
    DataStreamWriter.writeDataToFile(data, "../datasets/simulated/tmp.arff");

    //We load the data
    DataFlink<DataInstance> dataFlink = DataFlinkLoader.loadDataFromFile(env, "../datasets/simulated/tmp.arff", false);


    //ParallelVB is defined
    dVMP parallelVB = new dVMP();
    parallelVB.setOutput(true);
    parallelVB.setSeed(5);
    parallelVB.setBatchSize(100);
    parallelVB.setLocalThreshold(0.001);
    parallelVB.setGlobalThreshold(0.001);
    parallelVB.setMaximumLocalIterations(100);
    parallelVB.setMaximumGlobalIterations(100);

    //Setting DAG
    parallelVB.setDAG(wasteIncinerator.getDAG());


    //Setting the distributed data source
    parallelVB.initLearning();
    parallelVB.updateModel(dataFlink);

    BayesianNetwork bnet = parallelVB.getLearntBayesianNetwork();

    if (Main.VERBOSE) System.out.println(bnet.toString());
}