Java Code Examples for org.apache.flink.api.java.ExecutionEnvironment#fromElements()

The following examples show how to use org.apache.flink.api.java.ExecutionEnvironment#fromElements() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IterationWithAllReducerITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<String> initialInput = env.fromElements("1", "1", "1", "1", "1", "1", "1", "1");

	IterativeDataSet<String> iteration = initialInput.iterate(5).name("Loop");

	DataSet<String> sumReduce = iteration.reduce(new ReduceFunction<String>(){
		@Override
		public String reduce(String value1, String value2) throws Exception {
			return value1;
		}
	}).name("Compute sum (Reduce)");

	List<String> result = iteration.closeWith(sumReduce).collect();

	compareResultAsText(result, EXPECTED);
}
 
Example 2
Source File: SemanticPropertiesTranslationTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testUnaryFunctionMovingForwardedAnnotation() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	@SuppressWarnings("unchecked")
	DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L));
	input.map(new ShufflingMapper<Long>()).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
	Plan plan = env.createProgramPlan();

	GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
	MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput();

	SingleInputSemanticProperties semantics = mapper.getSemanticProperties();

	FieldSet fw1 = semantics.getForwardingTargetFields(0, 0);
	FieldSet fw2 = semantics.getForwardingTargetFields(0, 1);
	FieldSet fw3 = semantics.getForwardingTargetFields(0, 2);
	assertNotNull(fw1);
	assertNotNull(fw2);
	assertNotNull(fw3);
	assertTrue(fw1.contains(2));
	assertTrue(fw2.contains(0));
	assertTrue(fw3.contains(1));
}
 
Example 3
Source File: ConnectedComponentsWithRandomisedEdgesITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Long> vertexIds = env.generateSequence(1, NUM_VERTICES);
	DataSet<String> edgeString = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n"));

	DataSet<Edge<Long, NullValue>> edges = edgeString.map(new EdgeParser());

	DataSet<Vertex<Long, Long>> initialVertices = vertexIds.map(new IdAssigner());

	Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

	DataSet<Vertex<Long, Long>> result = graph.run(new ConnectedComponents<>(100));

	result.writeAsCsv(resultPath, "\n", " ");
	env.execute();
}
 
Example 4
Source File: ConnectedComponentsWithRandomisedEdgesITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Long> vertexIds = env.generateSequence(1, NUM_VERTICES);
	DataSet<String> edgeString = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n"));

	DataSet<Edge<Long, NullValue>> edges = edgeString.map(new EdgeParser());

	DataSet<Vertex<Long, Long>> initialVertices = vertexIds.map(new IdAssigner());

	Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

	DataSet<Vertex<Long, Long>> result = graph.run(new ConnectedComponents<>(100));

	result.writeAsCsv(resultPath, "\n", " ");
	env.execute();
}
 
Example 5
Source File: Java8WordCount.java    From flink-examples with MIT License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSource<String> lines = env.fromElements(
        "Apache Flink is a community-driven open source framework for distributed big data analytics,",
        "like Hadoop and Spark. The core of Apache Flink is a distributed streaming dataflow engine written",
        " in Java and Scala.[1][2] It aims to bridge the gap between MapReduce-like systems and shared-nothing",
        "parallel database systems. Therefore, Flink executes arbitrary dataflow programs in a data-parallel and",
        "pipelined manner.[3] Flink's pipelined runtime system enables the execution of bulk/batch and stream",
        "processing programs.[4][5] Furthermore, Flink's runtime supports the execution of iterative algorithms natively.[6]"
    );

    lines.flatMap((line, out) -> {
        String[] words = line.split("\\W+");
        for (String word : words) {
            out.collect(new Tuple2<>(word, 1));
        }
    })
    .returns(new TupleTypeInfo(TypeInformation.of(String.class), TypeInformation.of(Integer.class)))
    .groupBy(0)
    .sum(1)
    .print();
}
 
Example 6
Source File: SemanticPropertiesTranslationTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testUnaryFunctionAllForwardedExceptAnnotation() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	@SuppressWarnings("unchecked")
	DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L));
	input.map(new AllForwardedExceptMapper<Tuple3<Long, Long, Long>>()).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
	Plan plan = env.createProgramPlan();

	GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
	MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput();

	SingleInputSemanticProperties semantics = mapper.getSemanticProperties();

	FieldSet fw1 = semantics.getForwardingTargetFields(0, 0);
	FieldSet fw2 = semantics.getForwardingTargetFields(0, 2);
	assertNotNull(fw1);
	assertNotNull(fw2);
	assertTrue(fw1.contains(0));
	assertTrue(fw2.contains(2));
}
 
Example 7
Source File: IterationWithAllReducerITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<String> initialInput = env.fromElements("1", "1", "1", "1", "1", "1", "1", "1");

	IterativeDataSet<String> iteration = initialInput.iterate(5).name("Loop");

	DataSet<String> sumReduce = iteration.reduce(new ReduceFunction<String>(){
		@Override
		public String reduce(String value1, String value2) throws Exception {
			return value1;
		}
	}).name("Compute sum (Reduce)");

	List<String> result = iteration.closeWith(sumReduce).collect();

	compareResultAsText(result, EXPECTED);
}
 
Example 8
Source File: GraphOperationsITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testNumberOfEdges() throws Exception {
	/*
	 * Test numberOfEdges()
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
		TestGraphUtils.getLongLongEdgeData(env), env);
	DataSet<Long> data = env.fromElements(graph.numberOfEdges());

	List<Long> result = data.collect();

	expectedResult = "7";

	compareResultAsText(result, expectedResult);
}
 
Example 9
Source File: BroadcastVariablePipelinebreakerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testNoBreakerForIndependentVariable() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<String> source1 = env.fromElements("test");
		DataSet<String> source2 = env.fromElements("test");
		
		source1.map(new IdentityMapper<String>()).withBroadcastSet(source2, "some name")
				.output(new DiscardingOutputFormat<String>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource();
		
		assertEquals(TempMode.NONE, mapper.getInput().getTempMode());
		assertEquals(TempMode.NONE, mapper.getBroadcastInputs().get(0).getTempMode());
		
		assertEquals(DataExchangeMode.PIPELINED, mapper.getInput().getDataExchangeMode());
		assertEquals(DataExchangeMode.PIPELINED, mapper.getBroadcastInputs().get(0).getDataExchangeMode());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 10
Source File: GroupReduceITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Fix for FLINK-2019.
 *
 * @throws Exception
 */
@Test
public void testJodatimeDateTimeWithKryo() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple2<Integer, DateTime>> ds = env.fromElements(new Tuple2<>(1, DateTime.now()));
	DataSet<Tuple2<Integer, DateTime>> reduceDs = ds.groupBy("f1").sum(0).project(0);

	List<Tuple2<Integer, DateTime>> result = reduceDs.collect();

	String expected = "1\n";

	compareResultAsTuples(result, expected);
}
 
Example 11
Source File: StaticData.java    From flink with Apache License 2.0 5 votes vote down vote up
public static DataSet<String> getDefaultTextLineDataSet(ExecutionEnvironment env) {

		return env.fromElements(
			"To be, or not to be,--that is the question:--",
			"Whether 'tis nobler in the mind to suffer",
			"The slings and arrows of outrageous fortune",
			"Or to take arms against a sea of troubles,"
		);
	}
 
Example 12
Source File: ValueStateTransformationTest.java    From bravo with Apache License 2.0 5 votes vote down vote up
private Path transformLastSavepoint() throws IOException, Exception {
	ExecutionEnvironment environment = ExecutionEnvironment.createLocalEnvironment();
	Savepoint savepoint = getLastSavepoint();
	OperatorStateReader reader = new OperatorStateReader(environment, savepoint, "hello");

	DataSet<Tuple2<Integer, Integer>> countState = reader.readKeyedStates(
			KeyedStateReader.forValueStateKVPairs("Count", new TypeHint<Tuple2<Integer, Integer>>() {}));

	DataSet<Tuple2<Integer, Integer>> newCountsToAdd = environment
			.fromElements(Tuple2.of(0, 100), Tuple2.of(3, 1000), Tuple2.of(1, 100), Tuple2.of(2, 1000));

	DataSet<Tuple2<Integer, Integer>> newStates = countState.join(newCountsToAdd).where(0).equalTo(0)
			.map(new SumValues());

	Path newCheckpointBasePath = new Path(getCheckpointDir(), "new");
	OperatorStateWriter operatorStateWriter = new OperatorStateWriter(savepoint, "hello", newCheckpointBasePath);

	operatorStateWriter.addValueState("Count",
			countState.map(t -> Tuple2.of(t.f0, t.f1 * 2)).returns(new TypeHint<Tuple2<Integer, Integer>>() {}));

	operatorStateWriter.createNewValueState("Count2", newStates, IntSerializer.INSTANCE);
	operatorStateWriter.addKeyedStateRows(reader.getAllUnreadKeyedStateRows());

	OperatorState newOpState = operatorStateWriter.writeAll();
	Savepoint newSavepoint = StateMetadataUtils.createNewSavepoint(savepoint, newOpState);
	StateMetadataUtils.writeSavepointMetadata(newCheckpointBasePath, newSavepoint);
	return newCheckpointBasePath;
}
 
Example 13
Source File: ConsumePipelinedAndBlockingResultITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	DataSet<Tuple1<Long>> pipelinedSource = env.fromElements(new Tuple1<Long>(1L));

	DataSet<Tuple1<Long>> slowBlockingSource = env.generateSequence(0, 10).map(
			new MapFunction<Long, Tuple1<Long>>() {
				@Override
				public Tuple1<Long> map(Long value) throws Exception {
					Thread.sleep(200);

					return new Tuple1<Long>(value);
				}
			}
	);

	slowBlockingSource.join(slowBlockingSource)
			.where(0).equalTo(0).output(new DiscardingOutputFormat<Tuple2<Tuple1<Long>, Tuple1<Long>>>());

	// Join the slow blocking and the pipelined source. This test should verify that this works
	// w/o problems and the blocking result is not requested too early.
	pipelinedSource.join(slowBlockingSource)
			.where(0).equalTo(0)
			.output(new DiscardingOutputFormat<Tuple2<Tuple1<Long>, Tuple1<Long>>>());

	env.execute("Consume one pipelined and one blocking result test job");
}
 
Example 14
Source File: AggregateTranslationTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void translateAggregate() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		@SuppressWarnings("unchecked")
		DataSet<Tuple3<Double, StringValue, Long>> initialData =
				env.fromElements(new Tuple3<Double, StringValue, Long>(3.141592, new StringValue("foobar"), Long.valueOf(77)));

		initialData.groupBy(0).aggregate(Aggregations.MIN, 1).and(Aggregations.SUM, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, Long>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		GroupReduceOperatorBase<?, ?, ?> reducer = (GroupReduceOperatorBase<?, ?, ?>) sink.getInput();

		// check keys
		assertEquals(1, reducer.getKeyColumns(0).length);
		assertEquals(0, reducer.getKeyColumns(0)[0]);

		assertEquals(-1, reducer.getParallelism());
		assertTrue(reducer.isCombinable());

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 15
Source File: DataSetConversionUtilTest.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Test
public void testBasicConvert() throws Exception {
	ExecutionEnvironment env = MLEnvironmentFactory.getDefault().getExecutionEnvironment();

	DataSet <Row> input = env.fromElements(Row.of("a"));

	Table table1 = DataSetConversionUtil.toTable(MLEnvironmentFactory.DEFAULT_ML_ENVIRONMENT_ID, input, new String[] {"word"});
	Assert.assertEquals(
		new TableSchema(new String[] {"word"}, new TypeInformation[] {TypeInformation.of(String.class)}),
		table1.getSchema()
	);
	List <Row> list = DataSetConversionUtil.fromTable(MLEnvironmentFactory.DEFAULT_ML_ENVIRONMENT_ID, table1).collect();
	Assert.assertEquals(Collections.singletonList(Row.of("a")), list);
}
 
Example 16
Source File: JoinOperatorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test(expected = InvalidProgramException.class)
public void testJoinKeyInvalidAtomic2() {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo);
	DataSet<Integer> ds2 = env.fromElements(0, 0, 0);

	ds1.join(ds2).where(0).equalTo("*", "invalidKey");
}
 
Example 17
Source File: PartitionOperatorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testRangePartitionWithEmptyIndicesKey() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	final DataSource<Tuple2<Tuple2<Integer, Integer>, Integer>> ds = env.fromElements(
		new Tuple2<>(new Tuple2<>(1, 1), 1),
		new Tuple2<>(new Tuple2<>(2, 2), 2),
		new Tuple2<>(new Tuple2<>(2, 2), 2)
	);
	ds.partitionByRange(new int[]{});
}
 
Example 18
Source File: IterationsCompilerTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testTwoIterationsDirectlyChained() throws Exception {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(8);
		
		DataSet<Tuple2<Long, Long>> verticesWithInitialId = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
		
		DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
		
		DataSet<Tuple2<Long, Long>> bulkResult = doBulkIteration(verticesWithInitialId, edges);
		
		DataSet<Tuple2<Long, Long>> depResult = doDeltaIteration(bulkResult, edges);
		
		depResult.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		assertEquals(1, op.getDataSinks().size());
		assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof WorksetIterationPlanNode);
		
		WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
		BulkIterationPlanNode bipn = (BulkIterationPlanNode)wipn.getInput1().getSource();

		// the hash partitioning has been pushed out of the delta iteration into the bulk iteration
		assertEquals(ShipStrategyType.FORWARD, wipn.getInput1().getShipStrategy());

		// the input of the root step function is the last operator of the step function
		// since the work has been pushed out of the bulk iteration, it has to guarantee the hash partitioning
		for (Channel c : bipn.getRootOfStepFunction().getInputs()) {
			assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
		}

		assertEquals(DataExchangeMode.BATCH, wipn.getInput1().getDataExchangeMode());
		assertEquals(DataExchangeMode.BATCH, wipn.getInput2().getDataExchangeMode());
		
		assertEquals(TempMode.NONE, wipn.getInput1().getTempMode());
		assertEquals(TempMode.NONE, wipn.getInput2().getTempMode());
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 19
Source File: PregelCompilerTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("serial")
@Test
public void testPregelCompilerWithBroadcastVariable() {
	final String broadcastSetName = "broadcast";

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	// compose test program
	{
		DataSet<Long> bcVar = env.fromElements(1L);

		DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
			new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
			.map(new Tuple2ToVertexMap<>());

		DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
			.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

				public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
					return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
				}
			});

		Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

		VertexCentricConfiguration parameters = new VertexCentricConfiguration();
		parameters.addBroadcastSet(broadcastSetName, bcVar);

		DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(
			new CCCompute(), null, 100, parameters)
			.getVertices();

		result.output(new DiscardingOutputFormat<>());
	}

	Plan p = env.createProgramPlan("Pregel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof SingleInputPlanNode);

	SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());

	// check the computation coGroup
	DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
	assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
	assertTrue(computationCoGroup.getInput2().getTempMode().isCached());

	assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}
 
Example 20
Source File: HBaseWriteExample.java    From flink with Apache License 2.0 4 votes vote down vote up
private static DataSet<String> getDefaultTextLineDataSet(ExecutionEnvironment env) {
	return env.fromElements(WORDS);
}