org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#disableOperatorChaining

Source File: RollingSinkFaultToleranceITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Override
public void testProgram(StreamExecutionEnvironment env) {
	assertTrue("Broken test setup", NUM_STRINGS % 40 == 0);

	env.enableCheckpointing(20);
	env.setParallelism(12);
	env.disableOperatorChaining();

	DataStream<String> stream = env.addSource(new StringGeneratingSourceFunction(NUM_STRINGS)).startNewChain();

	DataStream<String> mapped = stream
			.map(new OnceFailingIdentityMapper(NUM_STRINGS));

	RollingSink<String> sink = new RollingSink<String>(outPath)
			.setBucketer(new NonRollingBucketer())
			.setBatchSize(10000)
			.setValidLengthPrefix("")
			.setPendingPrefix("")
			.setPendingSuffix(PENDING_SUFFIX)
			.setInProgressSuffix(IN_PROGRESS_SUFFIX);

	mapped.addSink(sink);

}

Source File: BucketingSinkFaultToleranceITCase.java From flink with Apache License 2.0

6 votes

@Override
public void testProgram(StreamExecutionEnvironment env) {
	assertTrue("Broken test setup", NUM_STRINGS % 40 == 0);

	env.enableCheckpointing(20);
	env.setParallelism(12);
	env.disableOperatorChaining();

	DataStream<String> stream = env.addSource(new StringGeneratingSourceFunction(NUM_STRINGS)).startNewChain();

	DataStream<String> mapped = stream
			.map(new OnceFailingIdentityMapper(NUM_STRINGS));

	BucketingSink<String> sink = new BucketingSink<String>(outPath)
			.setBucketer(new BasePathBucketer<String>())
			.setBatchSize(10000)
			.setValidLengthPrefix("")
			.setPartPrefix(PART_PREFIX)
			.setPendingPrefix("")
			.setPendingSuffix(PENDING_SUFFIX)
			.setInProgressSuffix(IN_PROGRESS_SUFFIX);

	mapped.addSink(sink);

}

Source File: ClickEventCount.java From flink with Apache License 2.0

6 votes

private static void configureEnvironment(
		final ParameterTool params,
		final StreamExecutionEnvironment env) {

	boolean checkpointingEnabled = params.has(CHECKPOINTING_OPTION);
	boolean eventTimeSemantics = params.has(EVENT_TIME_OPTION);

	if (checkpointingEnabled) {
		env.enableCheckpointing(1000);
	}

	if (eventTimeSemantics) {
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	}

	//disabling Operator chaining to make it easier to follow the Job in the WebUI
	env.disableOperatorChaining();
}

Source File: StreamingJobGraphGeneratorNodeHashTest.java From flink with Apache License 2.0

6 votes

/**
 * Tests that there are no collisions with two identical intermediate nodes connected to the
 * same predecessor.
 *
 * <pre>
 *             /-> [ (map) ] -> [ (sink) ]
 * [ (src) ] -+
 *             \-> [ (map) ] -> [ (sink) ]
 * </pre>
 */
@Test
public void testNodeHashIdenticalNodes() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(4);
	env.disableOperatorChaining();

	DataStream<String> src = env.addSource(new NoOpSourceFunction());

	src.map(new NoOpMapFunction()).addSink(new DiscardingSink<>());

	src.map(new NoOpMapFunction()).addSink(new DiscardingSink<>());

	JobGraph jobGraph = env.getStreamGraph().getJobGraph();
	Set<JobVertexID> vertexIds = new HashSet<>();
	for (JobVertex vertex : jobGraph.getVertices()) {
		assertTrue(vertexIds.add(vertex.getID()));
	}
}

Source File: StreamingJobGraphGeneratorNodeHashTest.java From flink with Apache License 2.0

6 votes

/**
 * Tests that there are no collisions with two identical intermediate nodes connected to the
 * same predecessor.
 *
 * <pre>
 *             /-> [ (map) ] -> [ (sink) ]
 * [ (src) ] -+
 *             \-> [ (map) ] -> [ (sink) ]
 * </pre>
 */
@Test
public void testNodeHashIdenticalNodes() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(4);
	env.disableOperatorChaining();

	DataStream<String> src = env.addSource(new NoOpSourceFunction());

	src.map(new NoOpMapFunction()).addSink(new DiscardingSink<>());

	src.map(new NoOpMapFunction()).addSink(new DiscardingSink<>());

	JobGraph jobGraph = env.getStreamGraph().getJobGraph();
	Set<JobVertexID> vertexIds = new HashSet<>();
	for (JobVertex vertex : jobGraph.getVertices()) {
		assertTrue(vertexIds.add(vertex.getID()));
	}
}

Source File: SavepointITCase.java From flink with Apache License 2.0

6 votes

/**
 * Creates a streaming JobGraph from the StreamEnvironment.
 */
private JobGraph createJobGraph(
	int parallelism,
	int numberOfRetries,
	long restartDelay) {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.disableOperatorChaining();
	env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(numberOfRetries, restartDelay));

	DataStream<Integer> stream = env
		.addSource(new InfiniteTestSource())
		.shuffle()
		.map(new StatefulCounter());

	stream.addSink(new DiscardingSink<>());

	return env.getStreamGraph().getJobGraph();
}

Source File: SavepointITCase.java From flink with Apache License 2.0

6 votes

/**
 * Creates a streaming JobGraph from the StreamEnvironment.
 */
private JobGraph createJobGraph(
	int parallelism,
	int numberOfRetries,
	long restartDelay) {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.disableOperatorChaining();
	env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(numberOfRetries, restartDelay));
	env.getConfig().disableSysoutLogging();

	DataStream<Integer> stream = env
		.addSource(new InfiniteTestSource())
		.shuffle()
		.map(new StatefulCounter());

	stream.addSink(new DiscardingSink<>());

	return env.getStreamGraph().getJobGraph();
}

Source File: StreamingJobGraphGeneratorNodeHashTest.java From flink with Apache License 2.0

6 votes

/**
 * Tests that there are no collisions with two identical sources.
 *
 * <pre>
 * [ (src0) ] --\
 *               +--> [ (sink) ]
 * [ (src1) ] --/
 * </pre>
 */
@Test
public void testNodeHashIdenticalSources() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(4);
	env.disableOperatorChaining();

	DataStream<String> src0 = env.addSource(new NoOpSourceFunction());
	DataStream<String> src1 = env.addSource(new NoOpSourceFunction());

	src0.union(src1).addSink(new DiscardingSink<>());

	JobGraph jobGraph = env.getStreamGraph().getJobGraph();

	List<JobVertex> vertices = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertTrue(vertices.get(0).isInputVertex());
	assertTrue(vertices.get(1).isInputVertex());

	assertNotNull(vertices.get(0).getID());
	assertNotNull(vertices.get(1).getID());

	assertNotEquals(vertices.get(0).getID(), vertices.get(1).getID());
}

Source File: ClickEventCount.java From flink-playgrounds with Apache License 2.0

6 votes

private static void configureEnvironment(
		final ParameterTool params,
		final StreamExecutionEnvironment env) {

	boolean checkpointingEnabled = params.has(CHECKPOINTING_OPTION);
	boolean eventTimeSemantics = params.has(EVENT_TIME_OPTION);

	if (checkpointingEnabled) {
		env.enableCheckpointing(1000);
	}

	if (eventTimeSemantics) {
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	}

	//disabling Operator chaining to make it easier to follow the Job in the WebUI
	env.disableOperatorChaining();
}

Source File: StreamingJobGraphGeneratorNodeHashTest.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Tests that there are no collisions with two identical intermediate nodes connected to the
 * same predecessor.
 *
 * <pre>
 *             /-> [ (map) ] -> [ (sink) ]
 * [ (src) ] -+
 *             \-> [ (map) ] -> [ (sink) ]
 * </pre>
 */
@Test
public void testNodeHashIdenticalNodes() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(4);
	env.disableOperatorChaining();

	DataStream<String> src = env.addSource(new NoOpSourceFunction());

	src.map(new NoOpMapFunction()).addSink(new NoOpSinkFunction());

	src.map(new NoOpMapFunction()).addSink(new NoOpSinkFunction());

	JobGraph jobGraph = env.getStreamGraph().getJobGraph();
	Set<JobVertexID> vertexIds = new HashSet<>();
	for (JobVertex vertex : jobGraph.getVertices()) {
		assertTrue(vertexIds.add(vertex.getID()));
	}
}

Source File: StreamingJobGraphGeneratorNodeHashTest.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Tests that there are no collisions with two identical sources.
 *
 * <pre>
 * [ (src0) ] --\
 *               +--> [ (sink) ]
 * [ (src1) ] --/
 * </pre>
 */
@Test
public void testNodeHashIdenticalSources() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(4);
	env.disableOperatorChaining();

	DataStream<String> src0 = env.addSource(new NoOpSourceFunction());
	DataStream<String> src1 = env.addSource(new NoOpSourceFunction());

	src0.union(src1).addSink(new NoOpSinkFunction());

	JobGraph jobGraph = env.getStreamGraph().getJobGraph();

	List<JobVertex> vertices = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertTrue(vertices.get(0).isInputVertex());
	assertTrue(vertices.get(1).isInputVertex());

	assertNotNull(vertices.get(0).getID());
	assertNotNull(vertices.get(1).getID());

	assertNotEquals(vertices.get(0).getID(), vertices.get(1).getID());
}

Source File: SavepointITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Creates a streaming JobGraph from the StreamEnvironment.
 */
private JobGraph createJobGraph(
	int parallelism,
	int numberOfRetries,
	long restartDelay) {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);
	env.disableOperatorChaining();
	env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(numberOfRetries, restartDelay));
	env.getConfig().disableSysoutLogging();

	DataStream<Integer> stream = env
		.addSource(new InfiniteTestSource())
		.shuffle()
		.map(new StatefulCounter());

	stream.addSink(new DiscardingSink<>());

	return env.getStreamGraph().getJobGraph();
}

Source File: BucketingSinkFaultToleranceITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

@Override
public void testProgram(StreamExecutionEnvironment env) {
	assertTrue("Broken test setup", NUM_STRINGS % 40 == 0);

	env.enableCheckpointing(20);
	env.setParallelism(12);
	env.disableOperatorChaining();

	DataStream<String> stream = env.addSource(new StringGeneratingSourceFunction(NUM_STRINGS)).startNewChain();

	DataStream<String> mapped = stream
			.map(new OnceFailingIdentityMapper(NUM_STRINGS));

	BucketingSink<String> sink = new BucketingSink<String>(outPath)
			.setBucketer(new BasePathBucketer<String>())
			.setBatchSize(10000)
			.setValidLengthPrefix("")
			.setPartPrefix(PART_PREFIX)
			.setPendingPrefix("")
			.setPendingSuffix(PENDING_SUFFIX)
			.setInProgressSuffix(IN_PROGRESS_SUFFIX);

	mapped.addSink(sink);

}

Source File: NotifyCheckpointAbortedITCase.java From flink with Apache License 2.0

5 votes

/**
 * Verify operators would be notified as checkpoint aborted.
 *
 * <p>The job would run with at least two checkpoints. The 1st checkpoint would fail due to add checkpoint to store,
 * and the 2nd checkpoint would decline by async checkpoint phase of 'DeclineSink'.
 *
 * <p>The job graph looks like:
 * NormalSource --> keyBy --> NormalMap --> DeclineSink
 */
@Test(timeout = TEST_TIMEOUT)
public void testNotifyCheckpointAborted() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.enableCheckpointing(200, CheckpointingMode.EXACTLY_ONCE);
	env.getCheckpointConfig().enableUnalignedCheckpoints(unalignedCheckpointEnabled);
	env.getCheckpointConfig().setTolerableCheckpointFailureNumber(1);
	env.disableOperatorChaining();
	env.setParallelism(1);

	final StateBackend failingStateBackend = new DeclineSinkFailingStateBackend(checkpointPath);
	env.setStateBackend(failingStateBackend);

	env.addSource(new NormalSource()).name("NormalSource")
		.keyBy((KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0)
		.transform("NormalMap", TypeInformation.of(Integer.class), new NormalMap())
		.transform(DECLINE_SINK_NAME, TypeInformation.of(Object.class), new DeclineSink());

	final ClusterClient<?> clusterClient = cluster.getClusterClient();
	JobGraph jobGraph = env.getStreamGraph().getJobGraph();
	JobID jobID = jobGraph.getJobID();

	ClientUtils.submitJob(clusterClient, jobGraph);

	TestingCompletedCheckpointStore.addCheckpointLatch.await();
	TestingCompletedCheckpointStore.abortCheckpointLatch.trigger();

	verifyAllOperatorsNotifyAborted();
	resetAllOperatorsNotifyAbortedLatches();
	verifyAllOperatorsNotifyAbortedTimes(1);

	DeclineSink.waitLatch.trigger();
	verifyAllOperatorsNotifyAborted();
	verifyAllOperatorsNotifyAbortedTimes(2);

	clusterClient.cancel(jobID).get();
}

Source File: StreamingJobGraphGeneratorNodeHashTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Tests that a collision on the manual hash throws an Exception.
 */
@Test(expected = IllegalArgumentException.class)
public void testManualHashAssignmentCollisionThrowsException() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(4);
	env.disableOperatorChaining();

	env.addSource(new NoOpSourceFunction()).uid("source")
			.map(new NoOpMapFunction()).uid("source") // Collision
			.addSink(new NoOpSinkFunction());

	// This call is necessary to generate the job graph
	env.getStreamGraph().getJobGraph();
}

Source File: StreamingJobGraphGeneratorNodeHashTest.java From flink with Apache License 2.0

5 votes

/**
 * Tests that a collision on the manual hash throws an Exception.
 */
@Test(expected = IllegalArgumentException.class)
public void testManualHashAssignmentCollisionThrowsException() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(4);
	env.disableOperatorChaining();

	env.addSource(new NoOpSourceFunction()).uid("source")
			.map(new NoOpMapFunction()).uid("source") // Collision
			.addSink(new DiscardingSink<>());

	// This call is necessary to generate the job graph
	env.getStreamGraph().getJobGraph();
}

Source File: RegionFailoverITCase.java From flink with Apache License 2.0

5 votes

private JobGraph createJobGraph() {

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(NUM_OF_REGIONS);
		env.setMaxParallelism(MAX_PARALLELISM);
		env.enableCheckpointing(200, CheckpointingMode.EXACTLY_ONCE);
		env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
		env.disableOperatorChaining();

		// Use DataStreamUtils#reinterpretAsKeyed to avoid merge regions and this stream graph would exist num of 'NUM_OF_REGIONS' individual regions.
		DataStreamUtils.reinterpretAsKeyedStream(
			env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS))
				.name(MULTI_REGION_SOURCE_NAME)
				.setParallelism(NUM_OF_REGIONS),
			(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
			TypeInformation.of(Integer.class))
			.map(new FailingMapperFunction(NUM_OF_RESTARTS))
			.setParallelism(NUM_OF_REGIONS)
			.addSink(new ValidatingSink())
			.setParallelism(NUM_OF_REGIONS);

		// another stream graph totally disconnected with the above one.
		env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS)).
			name(SINGLE_REGION_SOURCE_NAME).setParallelism(1)
			.map((MapFunction<Tuple2<Integer, Integer>, Object>) value -> value).setParallelism(1);

		return env.getStreamGraph().getJobGraph();
	}

Source File: PrometheusExampleJob.java From flink-prometheus-example with Apache License 2.0

5 votes

private void run() throws Exception {
  final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.enableCheckpointing(500);
  env.disableOperatorChaining();

  env.addSource(new RandomSourceFunction(parameters.getInt("elements", Integer.MAX_VALUE)))
      .name(RandomSourceFunction.class.getSimpleName())
      .map(new FlinkMetricsExposingMapFunction())
      .name(FlinkMetricsExposingMapFunction.class.getSimpleName())
      .addSink(new DiscardingSink<>())
      .name(DiscardingSink.class.getSimpleName());

  env.execute(PrometheusExampleJob.class.getSimpleName());
}

Source File: CheckpointedStreamingProgram.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

			env.enableCheckpointing(CHECKPOINT_INTERVALL);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 100L));
	env.disableOperatorChaining();

	DataStream<String> text = env.addSource(new SimpleStringGenerator());
	text.map(new StatefulMapper()).addSink(new DiscardingSink<>());
	env.setParallelism(1);
	env.execute("Checkpointed Streaming Program");
}

Source File: StreamingETL.java From flink-streaming-etl with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
	// parse arguments
	ParameterTool params = ParameterTool.fromPropertiesFile(args[0]);

	// create streaming environment
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// enable event time processing
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	// enable fault-tolerance
	env.enableCheckpointing(1000);

	// enable restarts
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(50, 500L));

	env.setStateBackend(new FsStateBackend("file:///home/robert/flink-workdir/flink-streaming-etl/state-backend"));

	// run each operator separately
	env.disableOperatorChaining();

	// get data from Kafka
	Properties kParams = params.getProperties();
	kParams.setProperty("group.id", UUID.randomUUID().toString());
	DataStream<ObjectNode> inputStream = env.addSource(new FlinkKafkaConsumer09<>(params.getRequired("topic"), new JSONDeserializationSchema(), kParams)).name("Kafka 0.9 Source")
		.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ObjectNode>(Time.minutes(1L)) {
			@Override
			public long extractTimestamp(ObjectNode jsonNodes) {
				return jsonNodes.get("timestamp_ms").asLong();
			}
		}).name("Timestamp extractor");

	// filter out records without lang field
	DataStream<ObjectNode> tweetsWithLang = inputStream.filter(jsonNode -> jsonNode.has("user") && jsonNode.get("user").has("lang")).name("Filter records without 'lang' field");

	// select only lang = "en" tweets
	DataStream<ObjectNode> englishTweets = tweetsWithLang.filter(jsonNode -> jsonNode.get("user").get("lang").asText().equals("en")).name("Select 'lang'=en tweets");

	// write to file system
	RollingSink<ObjectNode> rollingSink = new RollingSink<>(params.get("sinkPath", "/home/robert/flink-workdir/flink-streaming-etl/rolling-sink"));
	rollingSink.setBucketer(new DateTimeBucketer("yyyy-MM-dd-HH-mm")); // do a bucket for each minute
	englishTweets.addSink(rollingSink).name("Rolling FileSystem Sink");

	// build aggregates (count per language) using window (10 seconds tumbling):
	DataStream<Tuple3<Long, String, Long>> languageCounts = tweetsWithLang.keyBy(jsonNode -> jsonNode.get("user").get("lang").asText())
		.timeWindow(Time.seconds(10))
		.apply(new Tuple3<>(0L, "", 0L), new JsonFoldCounter(), new CountEmitter()).name("Count per Langauage (10 seconds tumbling)");

	// write window aggregate to ElasticSearch
	List<InetSocketAddress> transportNodes = ImmutableList.of(new InetSocketAddress(InetAddress.getByName("localhost"), 9300));
	ElasticsearchSink<Tuple3<Long, String, Long>> elasticsearchSink = new ElasticsearchSink<>(params.toMap(), transportNodes, new ESRequest());

	languageCounts.addSink(elasticsearchSink).name("ElasticSearch2 Sink");

	// word-count on the tweet stream
	DataStream<Tuple2<Date, List<Tuple2<String, Long>>>> topWordCount = tweetsWithLang
		// get text from tweets
		.map(tweet -> tweet.get("text").asText()).name("Get text from Tweets")
		// split text into (word, 1) tuples
		.flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
			@Override
			public void flatMap(String s, Collector<Tuple2<String, Long>> collector) throws Exception {
				String[] splits = s.split(" ");
				for (String sp : splits) {
					collector.collect(new Tuple2<>(sp, 1L));
				}
			}
		}).name("Tokenize words")
		// group by word
		.keyBy(0)
		// build 1 min windows, compute every 10 seconds --> count word frequency
		.timeWindow(Time.minutes(1L), Time.seconds(10L)).apply(new WordCountingWindow()).name("Count word frequency (1 min, 10 sec sliding window)")
		// build top n every 10 seconds
		.timeWindowAll(Time.seconds(10L)).apply(new TopNWords(10)).name("TopN Window (10s)");

	// write top Ns to Kafka topic
	topWordCount.addSink(new FlinkKafkaProducer09<>(params.getRequired("wc-topic"), new ListSerSchema(), params.getProperties())).name("Write topN to Kafka");

	env.execute("Streaming ETL");

}

Java Code Examples for org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#disableOperatorChaining()