Java Code Examples for org.apache.flink.api.java.io.DiscardingOutputFormat

The following examples show how to use org.apache.flink.api.java.io.DiscardingOutputFormat. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Flink-CEPplus   Source File: JoinCancelingITCase.java    License: Apache License 2.0 6 votes vote down vote up
private void executeTaskWithGenerator(
		JoinFunction<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> joiner,
		int keys, int vals, int msecsTillCanceling, int maxTimeTillCanceled) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple2<Integer, Integer>> input1 = env.createInput(new UniformIntTupleGeneratorInputFormat(keys, vals));
	DataSet<Tuple2<Integer, Integer>> input2 = env.createInput(new UniformIntTupleGeneratorInputFormat(keys, vals));

	input1.join(input2, JoinOperatorBase.JoinHint.REPARTITION_SORT_MERGE)
			.where(0)
			.equalTo(0)
			.with(joiner)
			.output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());

	env.setParallelism(PARALLELISM);

	runAndCancelJob(env.createProgramPlan(), msecsTillCanceling, maxTimeTillCanceled);
}
 
Example 2
Source Project: Flink-CEPplus   Source File: DegreesWithExceptionITCase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Test outDegrees() with an edge having a srcId that does not exist in the vertex DataSet.
 */
@Test
public void testOutDegreesInvalidEdgeSrcId() throws Exception {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(PARALLELISM);
	env.getConfig().disableSysoutLogging();

	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
			TestGraphUtils.getLongLongEdgeInvalidSrcData(env), env);

	try {
		graph.outDegrees().output(new DiscardingOutputFormat<>());
		env.execute();

		fail("graph.outDegrees() did not fail.");
	} catch (Exception e) {
		// We expect the job to fail with an exception
	}
}
 
Example 3
Source Project: flink   Source File: DegreesWithExceptionITCase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Test outDegrees() with an edge having a srcId that does not exist in the vertex DataSet.
 */
@Test
public void testOutDegreesInvalidEdgeSrcId() throws Exception {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(PARALLELISM);
	env.getConfig().disableSysoutLogging();

	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
			TestGraphUtils.getLongLongEdgeInvalidSrcData(env), env);

	try {
		graph.outDegrees().output(new DiscardingOutputFormat<>());
		env.execute();

		fail("graph.outDegrees() did not fail.");
	} catch (Exception e) {
		// We expect the job to fail with an exception
	}
}
 
Example 4
Source Project: Flink-CEPplus   Source File: CustomInputSplitProgram.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.getConfig().disableSysoutLogging();

		DataSet<Integer> data = env.createInput(new CustomInputFormat());

		data
			.map(new MapFunction<Integer, Tuple2<Integer, Double>>() {
				@Override
				public Tuple2<Integer, Double> map(Integer value) {
					return new Tuple2<Integer, Double>(value, value * 0.5);
				}
			})
			.output(new DiscardingOutputFormat<Tuple2<Integer, Double>>());

		env.execute();
	}
 
Example 5
Source Project: flink   Source File: ReduceAllTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReduce() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);

	set1.reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
			.output(new DiscardingOutputFormat<Long>()).name("Sink");

	Plan plan = env.createProgramPlan();

	try {
		OptimizedPlan oPlan = compileNoStats(plan);
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	} catch(CompilerException ce) {
		ce.printStackTrace();
		fail("The pact compiler is unable to compile this plan correctly");
	}
}
 
Example 6
Source Project: Flink-CEPplus   Source File: HardPlansCompilationTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Source -> Map -> Reduce -> Cross -> Reduce -> Cross -> Reduce ->
 * |--------------------------/                  /
 * |--------------------------------------------/
 * 
 * First cross has SameKeyFirst output contract
 */
@Test
public void testTicket158() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);

	set1.map(new IdentityMapper<Long>()).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
			.cross(set1).with(new IdentityCrosser<Long>()).withForwardedFieldsFirst("*").name("Cross1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce2")
			.cross(set1).with(new IdentityCrosser<Long>()).name("Cross2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce3")
			.output(new DiscardingOutputFormat<Long>()).name("Sink");

	Plan plan = env.createProgramPlan();
	OptimizedPlan oPlan = compileNoStats(plan);

	JobGraphGenerator jobGen = new JobGraphGenerator();
	jobGen.compileJobGraph(oPlan);
}
 
Example 7
Source Project: flink   Source File: DegreesWithExceptionITCase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Test inDegrees() with an edge having a trgId that does not exist in the vertex DataSet.
 */
@Test
public void testInDegreesInvalidEdgeTrgId() throws Exception {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(PARALLELISM);
	env.getConfig().disableSysoutLogging();

	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
			TestGraphUtils.getLongLongEdgeInvalidTrgData(env), env);

	try {
		graph.inDegrees().output(new DiscardingOutputFormat<>());
		env.execute();

		fail("graph.inDegrees() did not fail.");
	} catch (Exception e) {
		// We expect the job to fail with an exception
	}
}
 
Example 8
Source Project: flink   Source File: JoinCancelingITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCancelSortMatchWhileDoingHeavySorting() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	HeavyCompareGeneratorInputFormat input = new HeavyCompareGeneratorInputFormat(100);
	DataSet<Tuple2<HeavyCompare, Integer>> input1 = env.createInput(input);
	DataSet<Tuple2<HeavyCompare, Integer>> input2 = env.createInput(input);

	input1.join(input2, JoinOperatorBase.JoinHint.REPARTITION_SORT_MERGE)
			.where(0)
			.equalTo(0)
			.with(new JoinFunction<Tuple2<HeavyCompare, Integer>, Tuple2<HeavyCompare, Integer>, Tuple2<HeavyCompare, Integer>>() {
				@Override
				public Tuple2<HeavyCompare, Integer> join(
					Tuple2<HeavyCompare, Integer> first,
					Tuple2<HeavyCompare, Integer> second) throws Exception {
					throw new Exception("Job should be canceled in sort-merge phase, never run here ...");
				}
			})
			.output(new DiscardingOutputFormat<Tuple2<HeavyCompare, Integer>>());

	runAndCancelJob(env.createProgramPlan(), 30 * 1000, 60 * 1000);
}
 
Example 9
/**
 * Test groupReduceOnNeighbors() -NeighborsFunctionWithVertexValue-
 * with an edge having a trgId that does not exist in the vertex DataSet.
 */
@Test
public void testGroupReduceOnNeighborsWithVVInvalidEdgeTrgId() throws Exception {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(PARALLELISM);
	env.getConfig().disableSysoutLogging();

	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
			TestGraphUtils.getLongLongEdgeInvalidTrgData(env), env);

	try {
		DataSet<Tuple2<Long, Long>> verticesWithSumOfOutNeighborValues =
				graph.groupReduceOnNeighbors(new SumAllNeighbors(), EdgeDirection.ALL);

		verticesWithSumOfOutNeighborValues.output(new DiscardingOutputFormat<>());
		env.execute();

		fail("Expected an exception.");
	} catch (Exception e) {
		// We expect the job to fail with an exception
	}
}
 
Example 10
@Test
public void testUnaryFunctionWildcardForwardedAnnotation() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	@SuppressWarnings("unchecked")
	DataSet<Tuple3<Long, String, Integer>> input = env.fromElements(new Tuple3<Long, String, Integer>(3L, "test", 42));
	input.map(new WildcardForwardedMapper<Tuple3<Long, String, Integer>>()).output(new DiscardingOutputFormat<Tuple3<Long, String, Integer>>());
	Plan plan = env.createProgramPlan();

	GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
	MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput();

	SingleInputSemanticProperties semantics = mapper.getSemanticProperties();

	FieldSet fw1 = semantics.getForwardingTargetFields(0, 0);
	FieldSet fw2 = semantics.getForwardingTargetFields(0, 1);
	FieldSet fw3 = semantics.getForwardingTargetFields(0, 2);
	assertNotNull(fw1);
	assertNotNull(fw2);
	assertNotNull(fw3);
	assertTrue(fw1.contains(0));
	assertTrue(fw2.contains(1));
	assertTrue(fw3.contains(2));
}
 
Example 11
@Test
public void testUnaryFunctionForwardedInLine2() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	@SuppressWarnings("unchecked")
	DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L));
	input.map(new ReadSetMapper<Tuple3<Long, Long, Long>>()).withForwardedFields("0->1; 2")
			.output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
	Plan plan = env.createProgramPlan();

	GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
	MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput();

	SingleInputSemanticProperties semantics = mapper.getSemanticProperties();

	FieldSet fw1 = semantics.getForwardingTargetFields(0, 0);
	FieldSet fw2 = semantics.getForwardingTargetFields(0, 2);
	assertNotNull(fw1);
	assertNotNull(fw2);
	assertTrue(fw1.contains(1));
	assertTrue(fw2.contains(2));
}
 
Example 12
Source Project: flink   Source File: BranchingPlansCompilerTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testBranchesOnlyInBCVariables1() {
	try{
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(100);

		DataSet<Long> input = env.generateSequence(1, 10);
		DataSet<Long> bc_input = env.generateSequence(1, 10);
		
		input
			.map(new IdentityMapper<Long>()).withBroadcastSet(bc_input, "name1")
			.map(new IdentityMapper<Long>()).withBroadcastSet(bc_input, "name2")
			.output(new DiscardingOutputFormat<Long>());
		
		Plan plan = env.createProgramPlan();
		compileNoStats(plan);
	}
	catch(Exception e){
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 13
/**
 * Test groupReduceOnNeighbors() -NeighborsFunctionWithVertexValue-
 * with an edge having a srcId that does not exist in the vertex DataSet.
 */
@Test
public void testGroupReduceOnNeighborsWithVVInvalidEdgeSrcId() throws Exception {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(PARALLELISM);
	env.getConfig().disableSysoutLogging();

	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
			TestGraphUtils.getLongLongEdgeInvalidSrcData(env), env);

	try {
		DataSet<Tuple2<Long, Long>> verticesWithSumOfOutNeighborValues =
				graph.groupReduceOnNeighbors(new SumAllNeighbors(), EdgeDirection.ALL);

		verticesWithSumOfOutNeighborValues.output(new DiscardingOutputFormat<>());
		env.execute();

		fail("Expected an exception.");
	} catch (Exception e) {
		// We expect the job to fail with an exception
	}
}
 
Example 14
Source Project: flink   Source File: SemanticPropertiesTranslationTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testUnaryFunctionAllForwardedExceptAnnotation() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	@SuppressWarnings("unchecked")
	DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L));
	input.map(new AllForwardedExceptMapper<Tuple3<Long, Long, Long>>()).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
	Plan plan = env.createProgramPlan();

	GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
	MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput();

	SingleInputSemanticProperties semantics = mapper.getSemanticProperties();

	FieldSet fw1 = semantics.getForwardingTargetFields(0, 0);
	FieldSet fw2 = semantics.getForwardingTargetFields(0, 2);
	assertNotNull(fw1);
	assertNotNull(fw2);
	assertTrue(fw1.contains(0));
	assertTrue(fw2.contains(2));
}
 
Example 15
@Test
public void testAccumulator() {
	try {
		final int numElements = 100;

		ExecutionEnvironment env = ExecutionEnvironment.createCollectionsEnvironment();

		env.generateSequence(1, numElements)
			.map(new CountingMapper())
			.output(new DiscardingOutputFormat<Long>());

		JobExecutionResult result = env.execute();

		assertTrue(result.getNetRuntime() >= 0);

		assertEquals(numElements, (int) result.getAccumulatorResult(ACCUMULATOR_NAME));
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 16
Source Project: flink   Source File: AccumulatorErrorITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testInvalidTypeAccumulator() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	env.getConfig().disableSysoutLogging();

	// Test Exception forwarding with faulty Accumulator implementation
	env.generateSequence(0, 10000)
		.map(new IncompatibleAccumulatorTypesMapper())
		.map(new IncompatibleAccumulatorTypesMapper2())
		.output(new DiscardingOutputFormat<>());

	try {
		env.execute();
		fail("Should have failed.");
	} catch (JobExecutionException e) {
		assertTrue("Root cause should be:",
				e.getCause() instanceof Exception);
		assertTrue("Root cause should be:",
				e.getCause().getCause() instanceof UnsupportedOperationException);
	}
}
 
Example 17
Source Project: Flink-CEPplus   Source File: DisjointDataFlowsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testDisjointFlows() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		// generate two different flows
		env.generateSequence(1, 10)
				.output(new DiscardingOutputFormat<Long>());
		env.generateSequence(1, 10)
				.output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 18
Source Project: flink   Source File: IterationCompilerTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testIdentityIteration() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(43);
		
		IterativeDataSet<Long> iteration = env.generateSequence(-4, 1000).iterate(100);
		iteration.closeWith(iteration).output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 19
Source Project: flink   Source File: JobGraphGeneratorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testGeneratingJobGraphWithUnconsumedResultPartition() {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> input = env.fromElements(new Tuple2<>(1L, 2L))
		.setParallelism(1);

	DataSet<Tuple2<Long, Long>> ds = input.map(new IdentityMapper<>())
		.setParallelism(3);

	AbstractID intermediateDataSetID = new AbstractID();

	// this output branch will be excluded.
	ds.output(BlockingShuffleOutputFormat.createOutputFormat(intermediateDataSetID))
		.setParallelism(1);

	// this is the normal output branch.
	ds.output(new DiscardingOutputFormat<>())
		.setParallelism(1);

	JobGraph jobGraph = compileJob(env);

	Assert.assertEquals(3, jobGraph.getVerticesSortedTopologicallyFromSources().size());

	JobVertex mapVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(1);
	Assert.assertThat(mapVertex, Matchers.instanceOf(JobVertex.class));

	// there are 2 output result with one of them is ResultPartitionType.BLOCKING_PERSISTENT
	Assert.assertEquals(2, mapVertex.getProducedDataSets().size());

	Assert.assertTrue(mapVertex.getProducedDataSets().stream()
		.anyMatch(dataSet -> dataSet.getId().equals(new IntermediateDataSetID(intermediateDataSetID)) &&
			dataSet.getResultType() == ResultPartitionType.BLOCKING_PERSISTENT));
}
 
Example 20
public static void main(String[] args) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().disableSysoutLogging();

	// read vertex and edge data
	DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env)
			.rebalance();

	DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env)
			.rebalance()
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices
			.map(new ConnectedComponents.DuplicateValue<>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor,
	// update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges)
			.where(0).equalTo(0)
			.with(new ConnectedComponents.NeighborWithComponentIDJoin())

			.groupBy(0).aggregate(Aggregations.MIN, 1)

			.join(iteration.getSolutionSet())
			.where(0).equalTo(0)
			.with(new ConnectedComponents.ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.output(new DiscardingOutputFormat<>());

	env.execute();
}
 
Example 21
Source Project: flink   Source File: JoinCancelingITCase.java    License: Apache License 2.0 5 votes vote down vote up
private void executeTask(JoinFunction<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> joiner, boolean slow, int parallelism) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple2<Integer, Integer>> input1 = env.createInput(new InfiniteIntegerTupleInputFormat(slow));
	DataSet<Tuple2<Integer, Integer>> input2 = env.createInput(new InfiniteIntegerTupleInputFormat(slow));

	input1.join(input2, JoinOperatorBase.JoinHint.REPARTITION_SORT_MERGE)
			.where(0)
			.equalTo(0)
			.with(joiner)
			.output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());

	env.setParallelism(parallelism);

	runAndCancelJob(env.createProgramPlan(), 5 * 1000, 10 * 1000);
}
 
Example 22
@Test
public void testCustomPartitioningKeySelectorGroupReduce() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Integer, Integer>> data = env.fromElements(new Tuple2<Integer, Integer>(0, 0))
				.rebalance().setParallelism(4);
		
		data.groupBy(new TestKeySelector<Tuple2<Integer,Integer>>())
			.withPartitioner(new TestPartitionerInt())
				.reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Integer,Integer>>())
			.output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 23
Source Project: Flink-CEPplus   Source File: AccumulatorErrorITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testFaultyAccumulator() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().disableSysoutLogging();

	// Test Exception forwarding with faulty Accumulator implementation
	env.generateSequence(0, 10000)
		.map(new FaultyAccumulatorUsingMapper())
		.output(new DiscardingOutputFormat<>());

	assertAccumulatorsShouldFail(env.execute());
}
 
Example 24
Source Project: flink   Source File: DistinctTranslationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctPlain() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.distinct().output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {0, 1, 2}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 25
Source Project: flink   Source File: AdditionalOperatorsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCrossWithSmall() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);
	DataSet<Long> set2 = env.generateSequence(0,1);

	set1.crossWithTiny(set2).name("Cross")
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());

	try {
		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileWithStats(plan);
		OptimizerPlanNodeResolver resolver = new OptimizerPlanNodeResolver(oPlan);
		
		DualInputPlanNode crossPlanNode = resolver.getNode("Cross");
		Channel in1 = crossPlanNode.getInput1();
		Channel in2 = crossPlanNode.getInput2();
		
		assertEquals(ShipStrategyType.FORWARD, in1.getShipStrategy());
		assertEquals(ShipStrategyType.BROADCAST, in2.getShipStrategy());
	} catch(CompilerException ce) {
		ce.printStackTrace();
		fail("The Flink optimizer is unable to compile this plan correctly.");
	}
}
 
Example 26
Source Project: Flink-CEPplus   Source File: UserCodeType.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().disableSysoutLogging();

	DataSet<Integer> input = env.fromElements(1, 2, 3, 4, 5);

	DataSet<CustomType> customTypes = input.map(new MapFunction<Integer, CustomType>() {
		private static final long serialVersionUID = -5878758010124912128L;

		@Override
		public CustomType map(Integer integer) throws Exception {
			return new CustomType(integer);
		}
	}).rebalance();

	DataSet<Integer> result = customTypes.map(new MapFunction<CustomType, Integer>() {
		private static final long serialVersionUID = -7950126399899584991L;

		@Override
		public Integer map(CustomType value) throws Exception {
			return value.value;
		}
	});

	result.output(new DiscardingOutputFormat<Integer>());

	env.execute();
}
 
Example 27
Source Project: flink   Source File: PropertyDataSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource5() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f2")
			.splitsGroupedBy("f2");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(4)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(4)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 28
Source Project: Flink-CEPplus   Source File: IterationsCompilerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testIterationNotPushingWorkOut() throws Exception {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(8);

		DataSet<Tuple2<Long, Long>> input1 = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());

		DataSet<Tuple2<Long, Long>> input2 = env.readCsvFile("/some/file/path").types(Long.class, Long.class);

		// Use input1 as partial solution. Partial solution is used in a single join operation --> it is cheaper
		// to do the hash partitioning between the partial solution node and the join node
		// instead of pushing the partitioning out
		doSimpleBulkIteration(input1, input2).output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		assertEquals(1, op.getDataSinks().size());
		assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof BulkIterationPlanNode);

		BulkIterationPlanNode bipn = (BulkIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();

		// check that work has not been pushed out
		for (Channel c : bipn.getPartialSolutionPlanNode().getOutgoingChannels()) {
			assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
		}

		assertEquals(ShipStrategyType.FORWARD, bipn.getInput().getShipStrategy());

		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 29
Source Project: Flink-CEPplus   Source File: GroupCombineITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
// check if all API methods are callable
public void testAPI() throws Exception {

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple1<String>> ds = CollectionDataSets.getStringDataSet(env).map(new MapFunction<String, Tuple1<String>>() {
		@Override
		public Tuple1<String> map(String value) throws Exception {
			return new Tuple1<>(value);
		}
	});

	// all methods on DataSet
	ds.combineGroup(new GroupCombineFunctionExample())
	.output(new DiscardingOutputFormat<Tuple1<String>>());

	// all methods on UnsortedGrouping
	ds.groupBy(0).combineGroup(new GroupCombineFunctionExample())
	.output(new DiscardingOutputFormat<Tuple1<String>>());

	// all methods on SortedGrouping
	ds.groupBy(0).sortGroup(0, Order.ASCENDING).combineGroup(new GroupCombineFunctionExample())
	.output(new DiscardingOutputFormat<Tuple1<String>>());

	env.execute();
}
 
Example 30
Source Project: Flink-CEPplus   Source File: PipelineBreakerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testPipelineBreakerWithBroadcastVariable() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.getConfig().setExecutionMode(ExecutionMode.PIPELINED);
		env.setParallelism(64);
		
		DataSet<Long> source = env.generateSequence(1, 10).map(new IdentityMapper<Long>());
		
		DataSet<Long> result = source.map(new IdentityMapper<Long>())
									.map(new IdentityMapper<Long>())
										.withBroadcastSet(source, "bc");
		
		result.output(new DiscardingOutputFormat<Long>());
		
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode mapperInput = (SingleInputPlanNode) mapper.getInput().getSource();
		
		assertEquals(TempMode.NONE, mapper.getInput().getTempMode());
		assertEquals(TempMode.NONE, mapper.getBroadcastInputs().get(0).getTempMode());
		
		assertEquals(DataExchangeMode.BATCH, mapperInput.getInput().getDataExchangeMode());
		assertEquals(DataExchangeMode.BATCH, mapper.getBroadcastInputs().get(0).getDataExchangeMode());
		
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}