Java Code Examples for org.apache.flink.streaming.api.datastream.DataStream#iterate()

The following examples show how to use org.apache.flink.streaming.api.datastream.DataStream#iterate() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StreamGraphGeneratorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Test iteration job, check slot sharing group and co-location group.
 */
@Test
public void testIteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
	IterativeStream<Integer> iteration = source.iterate(3000);
	iteration.name("iteration").setParallelism(2);
	DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
	DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
	iteration.closeWith(filter).print();

	StreamGraph streamGraph = env.getStreamGraph();
	for (Tuple2<StreamNode, StreamNode> iterationPair : streamGraph.getIterationSourceSinkPairs()) {
		assertNotNull(iterationPair.f0.getCoLocationGroup());
		assertEquals(iterationPair.f0.getCoLocationGroup(), iterationPair.f1.getCoLocationGroup());

		assertEquals(StreamGraphGenerator.DEFAULT_SLOT_SHARING_GROUP, iterationPair.f0.getSlotSharingGroup());
		assertEquals(iterationPair.f0.getSlotSharingGroup(), iterationPair.f1.getSlotSharingGroup());
	}
}
 
Example 2
Source File: IterativeConnectedComponents.java    From gelly-streaming with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Set up the environment
		if(!parseParameters(args)) {
			return;
		}

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		DataStream<Tuple2<Long, Long>> edges = getEdgesDataSet(env);

		IterativeStream<Tuple2<Long, Long>> iteration = edges.iterate();
		DataStream<Tuple2<Long, Long>> result = iteration.closeWith(
				iteration.keyBy(0).flatMap(new AssignComponents()));

		// Emit the results
		result.print();

		env.execute("Streaming Connected Components");
	}
 
Example 3
Source File: StreamGraphGeneratorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Test iteration job, check slot sharing group and co-location group.
 */
@Test
public void testIteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
	IterativeStream<Integer> iteration = source.iterate(3000);
	iteration.name("iteration").setParallelism(2);
	DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
	DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
	iteration.closeWith(filter).print();

	final ResourceSpec resources = ResourceSpec.newBuilder(1.0, 100).build();
	iteration.getTransformation().setResources(resources, resources);

	StreamGraph streamGraph = env.getStreamGraph();
	for (Tuple2<StreamNode, StreamNode> iterationPair : streamGraph.getIterationSourceSinkPairs()) {
		assertNotNull(iterationPair.f0.getCoLocationGroup());
		assertEquals(iterationPair.f0.getCoLocationGroup(), iterationPair.f1.getCoLocationGroup());

		assertEquals(StreamGraphGenerator.DEFAULT_SLOT_SHARING_GROUP, iterationPair.f0.getSlotSharingGroup());
		assertEquals(iterationPair.f0.getSlotSharingGroup(), iterationPair.f1.getSlotSharingGroup());

		final ResourceSpec sourceMinResources = iterationPair.f0.getMinResources();
		final ResourceSpec sinkMinResources = iterationPair.f1.getMinResources();
		final ResourceSpec iterationResources = sourceMinResources.merge(sinkMinResources);
		assertThat(iterationResources, equalsResourceSpec(resources));
	}
}
 
Example 4
Source File: IterateITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testImmutabilityWithCoiteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); // for rebalance

	IterativeStream<Integer> iter1 = source.iterate();
	// Calling withFeedbackType should create a new iteration
	ConnectedIterativeStreams<Integer, String> iter2 = iter1.withFeedbackType(String.class);

	iter1.closeWith(iter1.map(noOpIntMap)).print();
	iter2.closeWith(iter2.map(noOpCoMap)).print();

	StreamGraph graph = env.getStreamGraph();

	assertEquals(2, graph.getIterationSourceSinkPairs().size());

	for (Tuple2<StreamNode, StreamNode> sourceSinkPair: graph.getIterationSourceSinkPairs()) {
		assertEquals(graph.getTargetVertex(sourceSinkPair.f0.getOutEdges().get(0)),
			graph.getSourceVertex(sourceSinkPair.f1.getInEdges().get(0)));
	}
}
 
Example 5
Source File: IterateITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("rawtypes")
@Test
public void testSimpleIteration() throws Exception {
	int numRetries = 5;
	int timeoutScale = 1;

	for (int numRetry = 0; numRetry < numRetries; numRetry++) {
		try {
			StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
			iterated = new boolean[parallelism];

			DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false))
					.map(noOpBoolMap).name("ParallelizeMap");

			IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale);

			DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap);

			iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink());

			iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink());

			env.execute();

			for (boolean iter : iterated) {
				assertTrue(iter);
			}

			break; // success
		} catch (Throwable t) {
			LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t);

			if (numRetry >= numRetries - 1) {
				throw t;
			} else {
				timeoutScale *= 2;
			}
		}
	}
}
 
Example 6
Source File: IterateITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test(expected = UnsupportedOperationException.class)
public void testIncorrectParallelism() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10);

	IterativeStream<Integer> iter1 = source.iterate();
	SingleOutputStreamOperator<Integer> map1 = iter1.map(noOpIntMap);
	iter1.closeWith(map1).print();
}
 
Example 7
Source File: IterateITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test(expected = UnsupportedOperationException.class)
public void testIncorrectParallelism() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10);

	IterativeStream<Integer> iter1 = source.iterate();
	SingleOutputStreamOperator<Integer> map1 = iter1.map(noOpIntMap);
	iter1.closeWith(map1).print();
}
 
Example 8
Source File: IterateITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test(expected = UnsupportedOperationException.class)
public void testIncorrectParallelism() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10);

	IterativeStream<Integer> iter1 = source.iterate();
	SingleOutputStreamOperator<Integer> map1 = iter1.map(noOpIntMap);
	iter1.closeWith(map1).print();
}
 
Example 9
Source File: IterateITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("rawtypes")
@Test
public void testSimpleIteration() throws Exception {
	int numRetries = 5;
	int timeoutScale = 1;

	for (int numRetry = 0; numRetry < numRetries; numRetry++) {
		try {
			StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
			iterated = new boolean[parallelism];

			DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false))
					.map(noOpBoolMap).name("ParallelizeMap");

			IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale);

			DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap);

			iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink());

			iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink());

			env.execute();

			for (boolean iter : iterated) {
				assertTrue(iter);
			}

			break; // success
		} catch (Throwable t) {
			LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t);

			if (numRetry >= numRetries - 1) {
				throw t;
			} else {
				timeoutScale *= 2;
			}
		}
	}
}
 
Example 10
Source File: IterateITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test(expected = UnsupportedOperationException.class)
public void testCoIterClosingFromOutOfLoop() throws Exception {

	// this test verifies that we cannot close an iteration with a DataStream that does not
	// have the iteration in its predecessors

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// introduce dummy mapper to get to correct parallelism
	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();
	ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType(
			Integer.class);

	coIter.closeWith(iter1.map(noOpIntMap));

}
 
Example 11
Source File: StreamingJobGraphGeneratorTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Test iteration job, check slot sharing group and co-location group.
 */
@Test
public void testIteration() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
	IterativeStream<Integer> iteration = source.iterate(3000);
	iteration.name("iteration").setParallelism(2);
	DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
	DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
	iteration.closeWith(filter).print();

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	SlotSharingGroup slotSharingGroup = jobGraph.getVerticesAsArray()[0].getSlotSharingGroup();
	assertNotNull(slotSharingGroup);

	CoLocationGroup iterationSourceCoLocationGroup = null;
	CoLocationGroup iterationSinkCoLocationGroup = null;

	for (JobVertex jobVertex : jobGraph.getVertices()) {
		// all vertices have same slot sharing group by default
		assertEquals(slotSharingGroup, jobVertex.getSlotSharingGroup());

		// all iteration vertices have same co-location group,
		// others have no co-location group by default
		if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SOURCE_NAME_PREFIX)) {
			iterationSourceCoLocationGroup = jobVertex.getCoLocationGroup();
			assertTrue(iterationSourceCoLocationGroup.getVertices().contains(jobVertex));
		} else if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SINK_NAME_PREFIX)) {
			iterationSinkCoLocationGroup = jobVertex.getCoLocationGroup();
			assertTrue(iterationSinkCoLocationGroup.getVertices().contains(jobVertex));
		} else {
			assertNull(jobVertex.getCoLocationGroup());
		}
	}

	assertNotNull(iterationSourceCoLocationGroup);
	assertNotNull(iterationSinkCoLocationGroup);
	assertEquals(iterationSourceCoLocationGroup, iterationSinkCoLocationGroup);
}
 
Example 12
Source File: IterateITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test(expected = UnsupportedOperationException.class)
public void testCoIterClosingFromOutOfLoop() throws Exception {

	// this test verifies that we cannot close an iteration with a DataStream that does not
	// have the iteration in its predecessors

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// introduce dummy mapper to get to correct parallelism
	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();
	ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType(
			Integer.class);

	coIter.closeWith(iter1.map(noOpIntMap));

}
 
Example 13
Source File: IterateITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test(expected = UnsupportedOperationException.class)
public void testClosingFromOutOfLoop() throws Exception {

	// this test verifies that we cannot close an iteration with a DataStream that does not
	// have the iteration in its predecessors

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// introduce dummy mapper to get to correct parallelism
	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();
	IterativeStream<Integer> iter2 = source.iterate();

	iter2.closeWith(iter1.map(noOpIntMap));

}
 
Example 14
Source File: IterateITCase.java    From flink with Apache License 2.0 3 votes vote down vote up
@Test(expected = IllegalStateException.class)
public void testExecutionWithEmptyIteration() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();

	iter1.map(noOpIntMap).print();

	env.execute();
}
 
Example 15
Source File: IterateITCase.java    From flink with Apache License 2.0 3 votes vote down vote up
@Test(expected = UnsupportedOperationException.class)
public void testDifferingParallelism() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// introduce dummy mapper to get to correct parallelism
	DataStream<Integer> source = env.fromElements(1, 10)
			.map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();

	iter1.closeWith(iter1.map(noOpIntMap).setParallelism(parallelism / 2));

}
 
Example 16
Source File: IterateITCase.java    From flink with Apache License 2.0 3 votes vote down vote up
@Test
public void testDoubleClosing() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// introduce dummy mapper to get to correct parallelism
	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();

	iter1.closeWith(iter1.map(noOpIntMap));
	iter1.closeWith(iter1.map(noOpIntMap));
}
 
Example 17
Source File: IterateITCase.java    From flink with Apache License 2.0 3 votes vote down vote up
@Test(expected = IllegalStateException.class)
public void testExecutionWithEmptyIteration() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();

	iter1.map(noOpIntMap).print();

	env.execute();
}
 
Example 18
Source File: IterateITCase.java    From Flink-CEPplus with Apache License 2.0 3 votes vote down vote up
@Test(expected = IllegalStateException.class)
public void testExecutionWithEmptyIteration() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();

	iter1.map(noOpIntMap).print();

	env.execute();
}
 
Example 19
Source File: IterateITCase.java    From flink with Apache License 2.0 3 votes vote down vote up
private void createIteration(StreamExecutionEnvironment env, int timeoutScale) {
	env.enableCheckpointing();

	DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false))
			.map(noOpBoolMap).name("ParallelizeMap");

	IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale);

	iteration.closeWith(iteration.flatMap(new IterationHead())).addSink(new ReceiveCheckNoOpSink<Boolean>());
}
 
Example 20
Source File: IterateITCase.java    From Flink-CEPplus with Apache License 2.0 3 votes vote down vote up
@Test(expected = UnsupportedOperationException.class)
public void testDifferingParallelism() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// introduce dummy mapper to get to correct parallelism
	DataStream<Integer> source = env.fromElements(1, 10)
			.map(noOpIntMap);

	IterativeStream<Integer> iter1 = source.iterate();

	iter1.closeWith(iter1.map(noOpIntMap).setParallelism(parallelism / 2));

}