Java Code Examples for org.apache.flink.api.java.DataSet#writeAsText()

The following examples show how to use org.apache.flink.api.java.DataSet#writeAsText() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HadoopReduceCombineFunctionITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCombiner() throws Exception {
	org.junit.Assume.assumeThat(mode, new IsEqual<TestExecutionMode>(TestExecutionMode.CLUSTER));
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
			map(new Mapper3());

	DataSet<Tuple2<IntWritable, IntWritable>> counts = ds.
			groupBy(0).
			reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
					new SumReducer(), new KeyChangingReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	counts.writeAsText(resultPath);
	env.execute();

	String expected = "(0,5)\n" +
			"(1,6)\n" +
			"(2,5)\n" +
			"(3,5)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}
 
Example 2
Source File: HadoopReduceCombineFunctionITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testStandardCountingWithCombiner() throws Exception{
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
			map(new Mapper1());

	DataSet<Tuple2<IntWritable, IntWritable>> counts = ds.
			groupBy(0).
			reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
					new SumReducer(), new SumReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	counts.writeAsText(resultPath);
	env.execute();

	String expected = "(0,5)\n" +
			"(1,6)\n" +
			"(2,6)\n" +
			"(3,4)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}
 
Example 3
Source File: HadoopReduceCombineFunctionITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testUngroupedHadoopReducer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
			map(new Mapper2());

	DataSet<Tuple2<IntWritable, IntWritable>> sum = ds.
			reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
					new SumReducer(), new SumReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	sum.writeAsText(resultPath);
	env.execute();

	String expected = "(0,231)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}
 
Example 4
Source File: HadoopMapFunctionITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testConfigurableMapper() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	JobConf conf = new JobConf();
	conf.set("my.filterPrefix", "Hello");

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
	DataSet<Tuple2<IntWritable, Text>> hellos = ds.
			flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new ConfigurableMapper(), conf));

	String resultPath = tempFolder.newFile().toURI().toString();

	hellos.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();

	String expected = "(2,Hello)\n" +
			"(3,Hello world)\n" +
			"(4,Hello world, how are you?)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}
 
Example 5
Source File: Readonly.java    From flink-perf with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	
	// set up the execution environment
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	System.err.println("Using input="+args[0]);
	// get input data
	DataSet<String> text = env.readTextFile(args[0]);
	DataSet<String> res = text.filter(new FilterFunction<String>() {
		@Override
		public boolean filter(String value) throws Exception {
			return false;
		}
	});
	res.writeAsText("file:///tmp/out", WriteMode.OVERWRITE);
	
	// execute program
	env.execute("Read only job");
}
 
Example 6
Source File: AvroTypeExtractionTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testWithAvroGenericSer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().enableForceAvro();
	Path in = new Path(inFile.getAbsoluteFile().toURI());

	AvroInputFormat<User> users = new AvroInputFormat<>(in, User.class);
	DataSet<User> usersDS = env.createInput(users);

	DataSet<Tuple2<String, Integer>> res = usersDS
		.groupBy((KeySelector<User, String>) value -> String.valueOf(value.getName()))
		.reduceGroup((GroupReduceFunction<User, Tuple2<String, Integer>>) (values, out) -> {
			for (User u : values) {
				out.collect(new Tuple2<>(u.getName().toString(), 1));
			}
		})
		.returns(Types.TUPLE(Types.STRING, Types.INT));

	res.writeAsText(resultPath);
	env.execute("Avro Key selection");

	expected = "(Charlie,1)\n(Alyssa,1)\n";
}
 
Example 7
Source File: HadoopReduceFunctionITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testStandardGrouping() throws Exception{
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).
			map(new Mapper1());

	DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
			groupBy(0).
			reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new CommentCntReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	commentCnts.writeAsText(resultPath);
	env.execute();

	String expected = "(0,0)\n" +
			"(1,3)\n" +
			"(2,5)\n" +
			"(3,5)\n" +
			"(4,2)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}
 
Example 8
Source File: WordCountSubclassPOJOITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<String> text = env.readTextFile(textPath);

	DataSet<WCBase> counts = text
			.flatMap(new Tokenizer())
			.groupBy("word")
			.reduce(new ReduceFunction<WCBase>() {
				private static final long serialVersionUID = 1L;
				public WCBase reduce(WCBase value1, WCBase value2) {
					WC wc1 = (WC) value1;
					WC wc2 = (WC) value2;
					return new WC(value1.word, wc1.secretCount + wc2.secretCount);
				}
			})
			.map(new MapFunction<WCBase, WCBase>() {
				@Override
				public WCBase map(WCBase value) throws Exception {
					WC wc = (WC) value;
					wc.count = wc.secretCount;
					return wc;
				}
			});

	counts.writeAsText(resultPath);

	env.execute("WordCount with custom data types example");
}
 
Example 9
Source File: HadoopReduceFunctionITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testConfigurationViaJobConf() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	JobConf conf = new JobConf();
	conf.set("my.cntPrefix", "Hello");

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).
			map(new Mapper2());

	DataSet<Tuple2<IntWritable, IntWritable>> helloCnts = ds.
			groupBy(0).
			reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(
					new ConfigurableCntReducer(), conf));

	String resultPath = tempFolder.newFile().toURI().toString();

	helloCnts.writeAsText(resultPath);
	env.execute();

	String expected = "(0,0)\n" +
			"(1,0)\n" +
			"(2,1)\n" +
			"(3,1)\n" +
			"(4,1)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}
 
Example 10
Source File: HadoopMapFunctionITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testDataDuplicatingMapper() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
	DataSet<Tuple2<IntWritable, Text>> duplicatingFlatMapDs = ds.
			flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new DuplicatingMapper()));

	String resultPath = tempFolder.newFile().toURI().toString();

	duplicatingFlatMapDs.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
	env.execute();

	String expected = "(1,Hi)\n" + "(1,HI)\n" +
			"(2,Hello)\n" + "(2,HELLO)\n" +
			"(3,Hello world)\n" + "(3,HELLO WORLD)\n" +
			"(4,Hello world, how are you?)\n" + "(4,HELLO WORLD, HOW ARE YOU?)\n" +
			"(5,I am fine.)\n" + "(5,I AM FINE.)\n" +
			"(6,Luke Skywalker)\n" + "(6,LUKE SKYWALKER)\n" +
			"(7,Comment#1)\n" + "(7,COMMENT#1)\n" +
			"(8,Comment#2)\n" + "(8,COMMENT#2)\n" +
			"(9,Comment#3)\n" + "(9,COMMENT#3)\n" +
			"(10,Comment#4)\n" + "(10,COMMENT#4)\n" +
			"(11,Comment#5)\n" + "(11,COMMENT#5)\n" +
			"(12,Comment#6)\n" + "(12,COMMENT#6)\n" +
			"(13,Comment#7)\n" + "(13,COMMENT#7)\n" +
			"(14,Comment#8)\n" + "(14,COMMENT#8)\n" +
			"(15,Comment#9)\n" + "(15,COMMENT#9)\n" +
			"(16,Comment#10)\n" + "(16,COMMENT#10)\n" +
			"(17,Comment#11)\n" + "(17,COMMENT#11)\n" +
			"(18,Comment#12)\n" + "(18,COMMENT#12)\n" +
			"(19,Comment#13)\n" + "(19,COMMENT#13)\n" +
			"(20,Comment#14)\n" + "(20,COMMENT#14)\n" +
			"(21,Comment#15)\n" + "(21,COMMENT#15)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}
 
Example 11
Source File: WordCountSubclassInterfacePOJOITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<String> text = env.readTextFile(textPath);

	DataSet<WCBase> counts = text
			.flatMap(new Tokenizer())
			.groupBy("word")
			.reduce(new ReduceFunction<WCBase>() {
				private static final long serialVersionUID = 1L;
				public WCBase reduce(WCBase value1, WCBase value2) {
					WC wc1 = (WC) value1;
					WC wc2 = (WC) value2;
					int c = wc1.secretCount.getCount() + wc2.secretCount.getCount();
					wc1.secretCount.setCount(c);
					return wc1;
				}
			})
			.map(new MapFunction<WCBase, WCBase>() {
				@Override
				public WCBase map(WCBase value) throws Exception {
					WC wc = (WC) value;
					wc.count = wc.secretCount.getCount();
					return wc;
				}
			});

	counts.writeAsText(resultPath);

	env.execute("WordCount with custom data types example");
}
 
Example 12
Source File: HadoopReduceCombineFunctionITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testConfigurationViaJobConf() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	JobConf conf = new JobConf();
	conf.set("my.cntPrefix", "Hello");

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).
			map(new Mapper4());

	DataSet<Tuple2<IntWritable, IntWritable>> hellos = ds.
			groupBy(0).
			reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(
					new ConfigurableCntReducer(), conf));

	String resultPath = tempFolder.newFile().toURI().toString();

	hellos.writeAsText(resultPath);
	env.execute();

	// return expected result
	String expected = "(0,0)\n" +
			"(1,0)\n" +
			"(2,1)\n" +
			"(3,1)\n" +
			"(4,1)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}
 
Example 13
Source File: BranchingPlansCompilerTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * 
 * <pre>
 *             (SRC A)     
 *             /     \      
 *        (SINK A)    (SINK B)
 * </pre>
 */
@Test
public void testBranchingWithMultipleDataSinksSmall() {
	try {
		String outPath1 = "/tmp/out1";
		String outPath2 = "/tmp/out2";

		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Long> source1 = env.generateSequence(0,1);

		source1.writeAsText(outPath1);
		source1.writeAsText(outPath2);

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);
		
		// ---------- check the optimizer plan ----------
		
		// number of sinks
		Assert.assertEquals("Wrong number of data sinks.", 2, oPlan.getDataSinks().size());
		
		// sinks contain all sink paths
		Set<String> allSinks = new HashSet<String>();
		allSinks.add(outPath1);
		allSinks.add(outPath2);
		
		for (SinkPlanNode n : oPlan.getDataSinks()) {
			String path = ((TextOutputFormat<String>)n.getSinkNode().getOperator()
					.getFormatWrapper().getUserCodeObject()).getOutputFilePath().toString();
			Assert.assertTrue("Invalid data sink.", allSinks.remove(path));
		}
		
		// ---------- compile plan to job graph to verify that no error is thrown ----------
		
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 14
Source File: Text.java    From flink-perf with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
      // set up the execution environment
      final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

      int dop = Integer.valueOf(args[0]);
      String outPath = args[1];
      long finalSizeGB = Integer.valueOf(args[2]);
int numberOfFiles = dop;
if(args.length > 3) {
	numberOfFiles = Integer.valueOf(args[3]);
}
      final long bytesPerMapper = ((finalSizeGB * 1024 * 1024 * 1024) / numberOfFiles);
      System.err.println("Generating Text data with the following properties:\n"
              + "dop="+dop+" outPath="+outPath+" finalSizeGB="+finalSizeGB+" bytesPerMapper="+bytesPerMapper+" number of files="+numberOfFiles);

      DataSet<Long> empty = env.generateSequence(1, numberOfFiles);
      DataSet<String> logLine = empty.flatMap(new FlatMapFunction<Long, String>() {
          private static final long serialVersionUID = 1L;
          @Override
          public void flatMap(Long value, Collector<String> out) throws Exception {
		System.err.println("got value="+value);
		Random rnd = new Utils.XORShiftRandom();
              StringBuffer sb = new StringBuffer();
              long bytesGenerated = 0;
              while(true) {
                  int sentenceLength = rnd.nextInt(25); // up to 16 words per sentence
                  for(int i = 0; i < sentenceLength; i++) {
                      sb.append(Utils.getFastZipfRandomWord());
                      sb.append(' ');
                  }
                  sb.append(sentenceEnds[rnd.nextInt(sentenceEnds.length-1)]);
                  final String str = sb.toString();
                  sb.delete(0, sb.length());
                  bytesGenerated += str.length();
                  out.collect(str);
                  // System.err.println("line ="+str);
                  if(bytesGenerated > bytesPerMapper) {
				System.err.println("value="+value+" done with "+bytesGenerated);
				break;
                  }
              }
          }
      }).setParallelism(numberOfFiles);
      logLine.writeAsText(outPath, FileSystem.WriteMode.OVERWRITE);
      env.setParallelism(numberOfFiles);
      env.execute("Flink Distributed Text Data Generator");
  }
 
Example 15
Source File: LinearRegression.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		final int iterations = params.getInt("iterations", 10);

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get input x data from elements
		DataSet<Data> data;
		if (params.has("input")) {
			// read data from CSV file
			data = env.readCsvFile(params.get("input"))
					.fieldDelimiter(" ")
					.includeFields(true, true)
					.pojoType(Data.class);
		} else {
			System.out.println("Executing LinearRegression example with default input data set.");
			System.out.println("Use --input to specify file input.");
			data = LinearRegressionData.getDefaultDataDataSet(env);
		}

		// get the parameters from elements
		DataSet<Params> parameters = LinearRegressionData.getDefaultParamsDataSet(env);

		// set number of bulk iterations for SGD linear Regression
		IterativeDataSet<Params> loop = parameters.iterate(iterations);

		DataSet<Params> newParameters = data
				// compute a single step using every sample
				.map(new SubUpdate()).withBroadcastSet(loop, "parameters")
				// sum up all the steps
				.reduce(new UpdateAccumulator())
				// average the steps and update all parameters
				.map(new Update());

		// feed new parameters back into next iteration
		DataSet<Params> result = loop.closeWith(newParameters);

		// emit result
		if (params.has("output")) {
			result.writeAsText(params.get("output"));
			// execute program
			env.execute("Linear Regression example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}
	}
 
Example 16
Source File: BranchingPlansCompilerTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * 
 * <pre>
 *             (SRC A)     
 *             /     \      
 *        (SINK A)    (SINK B)
 * </pre>
 */
@Test
public void testBranchingWithMultipleDataSinksSmall() {
	try {
		String outPath1 = "/tmp/out1";
		String outPath2 = "/tmp/out2";

		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Long> source1 = env.generateSequence(0,1);

		source1.writeAsText(outPath1);
		source1.writeAsText(outPath2);

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);
		
		// ---------- check the optimizer plan ----------
		
		// number of sinks
		Assert.assertEquals("Wrong number of data sinks.", 2, oPlan.getDataSinks().size());
		
		// sinks contain all sink paths
		Set<String> allSinks = new HashSet<String>();
		allSinks.add(outPath1);
		allSinks.add(outPath2);
		
		for (SinkPlanNode n : oPlan.getDataSinks()) {
			String path = ((TextOutputFormat<String>)n.getSinkNode().getOperator()
					.getFormatWrapper().getUserCodeObject()).getOutputFilePath().toString();
			Assert.assertTrue("Invalid data sink.", allSinks.remove(path));
		}
		
		// ---------- compile plan to job graph to verify that no error is thrown ----------
		
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 17
Source File: HadoopReduceFunctionITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testUngroupedHadoopReducer() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);

	DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
			reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new AllCommentCntReducer()));

	String resultPath = tempFolder.newFile().toURI().toString();

	commentCnts.writeAsText(resultPath);
	env.execute();

	String expected = "(42,15)\n";

	compareResultsByLinesInMemory(expected, resultPath);
}
 
Example 18
Source File: WordCountWithoutCombine.java    From flink-perf with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	
	if(!parseParameters(args)) {
		return;
	}
	
	// set up the execution environment
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	
	// get input data
	DataSet<String> text = env.readTextFile(textPath);
	
	DataSet<Tuple2<String, Integer>> counts = 
			// split up the lines in pairs (2-tuples) containing: (word,1)
			text.flatMap(new Tokenizer()).filter(new FilterFunction<Tuple2<String,Integer>>() {
				@Override
				public boolean filter(Tuple2<String, Integer> value) throws Exception {
					return !value.f1.equals("");
				}
			})
			// group by the tuple field "0" and sum up tuple field "1"
			.groupBy(0)
			.reduceGroup(new GroupReduceFunction<Tuple2<String,Integer>, Tuple2<String, Integer>>() {
				@Override
				public void reduce(
						Iterable<Tuple2<String, Integer>> valuesIt,
						Collector<Tuple2<String, Integer>> out) throws Exception {
					Iterator<Tuple2<String, Integer>> values = valuesIt.iterator();
					int count = 0;
					Tuple2<String, Integer> val = null; // this always works because the iterator always has something.
					while(values.hasNext()) {
						val = values.next();
						count += val.f1;
					}
					val.f1 = count;
					out.collect(val);
				}
			});
	
	counts.writeAsText(outputPath);
	// counts.writeAsCsv(outputPath, "\n", " ");
	
	// execute program
	env.execute("WordCountWithoutcombine");
}
 
Example 19
Source File: BulkIterationTranslationTest.java    From Flink-CEPplus with Apache License 2.0 2 votes vote down vote up
@Test
public void testCorrectTranslation() {
	final String jobName = "Test JobName";

	final int numIterations = 13;

	final int defaultParallelism = 133;
	final int iterationParallelism = 77;

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// ------------ construct the test program ------------------

	{
		env.setParallelism(defaultParallelism);

		@SuppressWarnings("unchecked")
		DataSet<Tuple3<Double, Long, String>> initialDataSet = env.fromElements(new Tuple3<>(3.44, 5L, "abc"));

		IterativeDataSet<Tuple3<Double, Long, String>> bulkIteration = initialDataSet.iterate(numIterations);
		bulkIteration.setParallelism(iterationParallelism);

		// test that multiple iteration consumers are supported
		DataSet<Tuple3<Double, Long, String>> identity = bulkIteration
			.map(new IdentityMapper<Tuple3<Double, Long, String>>());

		DataSet<Tuple3<Double, Long, String>> result = bulkIteration.closeWith(identity);

		result.output(new DiscardingOutputFormat<Tuple3<Double, Long, String>>());
		result.writeAsText("/dev/null");
	}

	Plan p = env.createProgramPlan(jobName);

	// ------------- validate the plan ----------------

	BulkIterationBase<?> iteration = (BulkIterationBase<?>) p.getDataSinks().iterator().next().getInput();

	assertEquals(jobName, p.getJobName());
	assertEquals(defaultParallelism, p.getDefaultParallelism());
	assertEquals(iterationParallelism, iteration.getParallelism());
}
 
Example 20
Source File: BulkIterationTranslationTest.java    From flink with Apache License 2.0 2 votes vote down vote up
@Test
public void testCorrectTranslation() {
	final String jobName = "Test JobName";

	final int numIterations = 13;

	final int defaultParallelism = 133;
	final int iterationParallelism = 77;

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// ------------ construct the test program ------------------

	{
		env.setParallelism(defaultParallelism);

		@SuppressWarnings("unchecked")
		DataSet<Tuple3<Double, Long, String>> initialDataSet = env.fromElements(new Tuple3<>(3.44, 5L, "abc"));

		IterativeDataSet<Tuple3<Double, Long, String>> bulkIteration = initialDataSet.iterate(numIterations);
		bulkIteration.setParallelism(iterationParallelism);

		// test that multiple iteration consumers are supported
		DataSet<Tuple3<Double, Long, String>> identity = bulkIteration
			.map(new IdentityMapper<Tuple3<Double, Long, String>>());

		DataSet<Tuple3<Double, Long, String>> result = bulkIteration.closeWith(identity);

		result.output(new DiscardingOutputFormat<Tuple3<Double, Long, String>>());
		result.writeAsText("/dev/null");
	}

	Plan p = env.createProgramPlan(jobName);

	// ------------- validate the plan ----------------

	BulkIterationBase<?> iteration = (BulkIterationBase<?>) p.getDataSinks().iterator().next().getInput();

	assertEquals(jobName, p.getJobName());
	assertEquals(defaultParallelism, p.getDefaultParallelism());
	assertEquals(iterationParallelism, iteration.getParallelism());
}