org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor Java Examples

The following examples show how to use org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BoundedOutOfOrdernessTimestampExtractorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private void runValidTests(BoundedOutOfOrdernessTimestampExtractor<Long> extractor) {
	assertEquals(new Watermark(Long.MIN_VALUE), extractor.getCurrentWatermark());

	assertEquals(13L, extractor.extractTimestamp(13L, 0L));
	assertEquals(13L, extractor.extractTimestamp(13L, 0L));
	assertEquals(14L, extractor.extractTimestamp(14L, 0L));
	assertEquals(20L, extractor.extractTimestamp(20L, 0L));

	assertEquals(new Watermark(10L), extractor.getCurrentWatermark());

	assertEquals(20L, extractor.extractTimestamp(20L, 0L));
	assertEquals(20L, extractor.extractTimestamp(20L, 0L));
	assertEquals(500L, extractor.extractTimestamp(500L, 0L));

	assertEquals(new Watermark(490L), extractor.getCurrentWatermark());

	assertEquals(Long.MAX_VALUE - 1, extractor.extractTimestamp(Long.MAX_VALUE - 1, 0L));
	assertEquals(new Watermark(Long.MAX_VALUE - 11), extractor.getCurrentWatermark());
}
 
Example #2
Source File: WordCountIntegrationTest.java    From tutorials with MIT License 6 votes vote down vote up
@Test
public void givenStreamOfEvents_whenProcessEvents_thenShouldApplyWindowingOnTransformation() throws Exception {
    // given
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    SingleOutputStreamOperator<Tuple2<Integer, Long>> windowed = env.fromElements(new Tuple2<>(16, ZonedDateTime.now().plusMinutes(25).toInstant().getEpochSecond()), new Tuple2<>(15, ZonedDateTime.now().plusMinutes(2).toInstant().getEpochSecond()))
            .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Tuple2<Integer, Long>>(Time.seconds(20)) {
                @Override
                public long extractTimestamp(Tuple2<Integer, Long> element) {
                    return element.f1 * 1000;
                }
            });

    SingleOutputStreamOperator<Tuple2<Integer, Long>> reduced = windowed.windowAll(TumblingEventTimeWindows.of(Time.seconds(5))).maxBy(0, true);

    reduced.print();

    // when
    env.execute();
}
 
Example #3
Source File: BoundedOutOfOrdernessTimestampExtractorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private void runValidTests(BoundedOutOfOrdernessTimestampExtractor<Long> extractor) {
	assertEquals(new Watermark(Long.MIN_VALUE), extractor.getCurrentWatermark());

	assertEquals(13L, extractor.extractTimestamp(13L, 0L));
	assertEquals(13L, extractor.extractTimestamp(13L, 0L));
	assertEquals(14L, extractor.extractTimestamp(14L, 0L));
	assertEquals(20L, extractor.extractTimestamp(20L, 0L));

	assertEquals(new Watermark(10L), extractor.getCurrentWatermark());

	assertEquals(20L, extractor.extractTimestamp(20L, 0L));
	assertEquals(20L, extractor.extractTimestamp(20L, 0L));
	assertEquals(500L, extractor.extractTimestamp(500L, 0L));

	assertEquals(new Watermark(490L), extractor.getCurrentWatermark());

	assertEquals(Long.MAX_VALUE - 1, extractor.extractTimestamp(Long.MAX_VALUE - 1, 0L));
	assertEquals(new Watermark(Long.MAX_VALUE - 11), extractor.getCurrentWatermark());
}
 
Example #4
Source File: BoundedOutOfOrdernessTimestampExtractorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testInitialFinalAndWatermarkUnderflow() {
	BoundedOutOfOrdernessTimestampExtractor<Long> extractor = new LongExtractor(Time.milliseconds(10L));
	assertEquals(Long.MIN_VALUE, extractor.getCurrentWatermark().getTimestamp());

	extractor.extractTimestamp(Long.MIN_VALUE, -1L);

	// the following two lines check for underflow.
	// We have a max latency of 5 millis.
	// We insert an element with ts of Long.MIN_VALUE + 2, which will now be the max ts,
	// then when getting the next watermark, we would have Long.MIN_VALUE + 2 - 5 which
	// would lead to underflow.

	extractor.extractTimestamp(Long.MIN_VALUE + 2, -1);
	assertEquals(Long.MIN_VALUE, extractor.getCurrentWatermark().getTimestamp());

	extractor.extractTimestamp(Long.MAX_VALUE, -1L);
	assertEquals(Long.MAX_VALUE - 10, extractor.getCurrentWatermark().getTimestamp());
}
 
Example #5
Source File: KafkaItemTransactionJob.java    From flink-tutorials with Apache License 2.0 6 votes vote down vote up
public DataStream<ItemTransaction> readTransactionStream(ParameterTool params, StreamExecutionEnvironment env) {
	// We read the ItemTransaction objects directly using the schema
	FlinkKafkaConsumer<ItemTransaction> transactionSource = new FlinkKafkaConsumer<>(
			params.getRequired(TRANSACTION_INPUT_TOPIC_KEY), new TransactionSchema(),
			Utils.readKafkaProperties(params, true));

	transactionSource.setCommitOffsetsOnCheckpoints(true);
	transactionSource.setStartFromEarliest();

	// In case event time processing is enabled we assign trailing watermarks for each partition
	transactionSource.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ItemTransaction>(Time.minutes(1)) {
		@Override
		public long extractTimestamp(ItemTransaction transaction) {
			return transaction.ts;
		}
	});

	return env.addSource(transactionSource)
			.name("Kafka Transaction Source")
			.uid("Kafka Transaction Source");
}
 
Example #6
Source File: BoundedOutOfOrdernessTimestampExtractorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testInitialFinalAndWatermarkUnderflow() {
	BoundedOutOfOrdernessTimestampExtractor<Long> extractor = new LongExtractor(Time.milliseconds(10L));
	assertEquals(Long.MIN_VALUE, extractor.getCurrentWatermark().getTimestamp());

	extractor.extractTimestamp(Long.MIN_VALUE, -1L);

	// the following two lines check for underflow.
	// We have a max latency of 5 millis.
	// We insert an element with ts of Long.MIN_VALUE + 2, which will now be the max ts,
	// then when getting the next watermark, we would have Long.MIN_VALUE + 2 - 5 which
	// would lead to underflow.

	extractor.extractTimestamp(Long.MIN_VALUE + 2, -1);
	assertEquals(Long.MIN_VALUE, extractor.getCurrentWatermark().getTimestamp());

	extractor.extractTimestamp(Long.MAX_VALUE, -1L);
	assertEquals(Long.MAX_VALUE - 10, extractor.getCurrentWatermark().getTimestamp());
}
 
Example #7
Source File: BoundedAssigner.java    From flink-simple-tutorial with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // 指定系统时间概念为 event time
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        List<Tuple2<String, Long>> collectionInput = new ArrayList<>();
        Tuple2<String, Long> a = new Tuple2<>("first event", 1L);
        Tuple2<String, Long> b = new Tuple2<>("second event", 2L);
        collectionInput.add(a);
        collectionInput.add(b);

        // 使用 Ascending 分配 时间信息和 watermark 设定10s 代表最长的时延
        DataStream<Tuple2<String, Long>> text = env.fromCollection(collectionInput);
        text.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Tuple2<String, Long>>(Time.seconds(10)) {
            @Override
            public long extractTimestamp(Tuple2<String, Long> element) {
                return element.f1;
            }
        });

        env.execute();
    }
 
Example #8
Source File: BoundedOutOfOrdernessTimestampExtractorTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private void runValidTests(BoundedOutOfOrdernessTimestampExtractor<Long> extractor) {
	assertEquals(new Watermark(Long.MIN_VALUE), extractor.getCurrentWatermark());

	assertEquals(13L, extractor.extractTimestamp(13L, 0L));
	assertEquals(13L, extractor.extractTimestamp(13L, 0L));
	assertEquals(14L, extractor.extractTimestamp(14L, 0L));
	assertEquals(20L, extractor.extractTimestamp(20L, 0L));

	assertEquals(new Watermark(10L), extractor.getCurrentWatermark());

	assertEquals(20L, extractor.extractTimestamp(20L, 0L));
	assertEquals(20L, extractor.extractTimestamp(20L, 0L));
	assertEquals(500L, extractor.extractTimestamp(500L, 0L));

	assertEquals(new Watermark(490L), extractor.getCurrentWatermark());

	assertEquals(Long.MAX_VALUE - 1, extractor.extractTimestamp(Long.MAX_VALUE - 1, 0L));
	assertEquals(new Watermark(Long.MAX_VALUE - 11), extractor.getCurrentWatermark());
}
 
Example #9
Source File: BoundedOutOfOrdernessTimestampExtractorTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testInitialFinalAndWatermarkUnderflow() {
	BoundedOutOfOrdernessTimestampExtractor<Long> extractor = new LongExtractor(Time.milliseconds(10L));
	assertEquals(Long.MIN_VALUE, extractor.getCurrentWatermark().getTimestamp());

	extractor.extractTimestamp(Long.MIN_VALUE, -1L);

	// the following two lines check for underflow.
	// We have a max latency of 5 millis.
	// We insert an element with ts of Long.MIN_VALUE + 2, which will now be the max ts,
	// then when getting the next watermark, we would have Long.MIN_VALUE + 2 - 5 which
	// would lead to underflow.

	extractor.extractTimestamp(Long.MIN_VALUE + 2, -1);
	assertEquals(Long.MIN_VALUE, extractor.getCurrentWatermark().getTimestamp());

	extractor.extractTimestamp(Long.MAX_VALUE, -1L);
	assertEquals(Long.MAX_VALUE - 10, extractor.getCurrentWatermark().getTimestamp());
}
 
Example #10
Source File: DataStreamAllroundTestJobFactory.java    From flink with Apache License 2.0 5 votes vote down vote up
static BoundedOutOfOrdernessTimestampExtractor<Event> createTimestampExtractor(ParameterTool pt) {
	return new BoundedOutOfOrdernessTimestampExtractor<Event>(
		Time.milliseconds(
			pt.getLong(
				SEQUENCE_GENERATOR_SRC_EVENT_TIME_MAX_OUT_OF_ORDERNESS.key(),
				SEQUENCE_GENERATOR_SRC_EVENT_TIME_MAX_OUT_OF_ORDERNESS.defaultValue()))) {

		private static final long serialVersionUID = -3154419724891779938L;

		@Override
		public long extractTimestamp(Event element) {
			return element.getEventTime();
		}
	};
}
 
Example #11
Source File: BoundedOutOfOrdernessTimestampExtractorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testInitializationAndRuntime() {
	Time maxAllowedLateness = Time.milliseconds(10L);
	BoundedOutOfOrdernessTimestampExtractor<Long> extractor =
		new LongExtractor(maxAllowedLateness);

	assertEquals(maxAllowedLateness.toMilliseconds(),
		extractor.getMaxOutOfOrdernessInMillis());

	runValidTests(extractor);
}
 
Example #12
Source File: Main2.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        //并行度设置为 1
        env.setParallelism(1);
//        env.setParallelism(4);

        SingleOutputStreamOperator<Word> data = env.socketTextStream("localhost", 9001)
                .map(new MapFunction<String, Word>() {
                    @Override
                    public Word map(String value) throws Exception {
                        String[] split = value.split(",");
                        return new Word(split[0], Integer.valueOf(split[1]), Long.valueOf(split[2]));
                    }
                });

        //BoundedOutOfOrdernessTimestampExtractor
        data.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Word>(Time.seconds(10)) {
            @Override
            public long extractTimestamp(Word element) {
                return element.getTimestamp();
            }
        });

        data.print();
        env.execute("watermark demo");
    }
 
Example #13
Source File: DataStreamAllroundTestJobFactory.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
static BoundedOutOfOrdernessTimestampExtractor<Event> createTimestampExtractor(ParameterTool pt) {
	return new BoundedOutOfOrdernessTimestampExtractor<Event>(
		Time.milliseconds(
			pt.getLong(
				SEQUENCE_GENERATOR_SRC_EVENT_TIME_MAX_OUT_OF_ORDERNESS.key(),
				SEQUENCE_GENERATOR_SRC_EVENT_TIME_MAX_OUT_OF_ORDERNESS.defaultValue()))) {

		private static final long serialVersionUID = -3154419724891779938L;

		@Override
		public long extractTimestamp(Event element) {
			return element.getEventTime();
		}
	};
}
 
Example #14
Source File: ClickEventCount.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	final ParameterTool params = ParameterTool.fromArgs(args);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	configureEnvironment(params, env);

	String inputTopic = params.get("input-topic", "input");
	String outputTopic = params.get("output-topic", "output");
	String brokers = params.get("bootstrap.servers", "localhost:9092");
	Properties kafkaProps = new Properties();
	kafkaProps.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers);
	kafkaProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "click-event-count");

	env.addSource(new FlinkKafkaConsumer<>(inputTopic, new ClickEventDeserializationSchema(), kafkaProps))
		.name("ClickEvent Source")
		.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ClickEvent>(Time.of(200, TimeUnit.MILLISECONDS)) {
			@Override
			public long extractTimestamp(final ClickEvent element) {
				return element.getTimestamp().getTime();
			}
		})
		.keyBy(ClickEvent::getPage)
		.timeWindow(WINDOW_SIZE)
		.aggregate(new CountingAggregator(),
			new ClickEventStatisticsCollector())
		.name("ClickEvent Counter")
		.addSink(new FlinkKafkaProducer<>(
			outputTopic,
			new ClickEventStatisticsSerializationSchema(outputTopic),
			kafkaProps,
			FlinkKafkaProducer.Semantic.AT_LEAST_ONCE))
		.name("ClickEventStatistics Sink");

	env.execute("Click Event Count");
}
 
Example #15
Source File: Main2.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        //并行度设置为 1
        env.setParallelism(1);
//        env.setParallelism(4);

        SingleOutputStreamOperator<Word> data = env.socketTextStream("localhost", 9001)
                .map(new MapFunction<String, Word>() {
                    @Override
                    public Word map(String value) throws Exception {
                        String[] split = value.split(",");
                        return new Word(split[0], Integer.valueOf(split[1]), Long.valueOf(split[2]));
                    }
                });

        //BoundedOutOfOrdernessTimestampExtractor
        data.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Word>(Time.seconds(10)) {
            @Override
            public long extractTimestamp(Word element) {
                return element.getTimestamp();
            }
        });

        data.print();
        env.execute("watermark demo");
    }
 
Example #16
Source File: DataStreamAllroundTestJobFactory.java    From flink with Apache License 2.0 5 votes vote down vote up
static BoundedOutOfOrdernessTimestampExtractor<Event> createTimestampExtractor(ParameterTool pt) {
	return new BoundedOutOfOrdernessTimestampExtractor<Event>(
		Time.milliseconds(
			pt.getLong(
				SEQUENCE_GENERATOR_SRC_EVENT_TIME_MAX_OUT_OF_ORDERNESS.key(),
				SEQUENCE_GENERATOR_SRC_EVENT_TIME_MAX_OUT_OF_ORDERNESS.defaultValue()))) {

		private static final long serialVersionUID = -3154419724891779938L;

		@Override
		public long extractTimestamp(Event element) {
			return element.getEventTime();
		}
	};
}
 
Example #17
Source File: BoundedOutOfOrdernessTimestampExtractorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testInitializationAndRuntime() {
	Time maxAllowedLateness = Time.milliseconds(10L);
	BoundedOutOfOrdernessTimestampExtractor<Long> extractor =
		new LongExtractor(maxAllowedLateness);

	assertEquals(maxAllowedLateness.toMilliseconds(),
		extractor.getMaxOutOfOrdernessInMillis());

	runValidTests(extractor);
}
 
Example #18
Source File: BoundedOutOfOrdernessTimestampExtractorTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testInitializationAndRuntime() {
	Time maxAllowedLateness = Time.milliseconds(10L);
	BoundedOutOfOrdernessTimestampExtractor<Long> extractor =
		new LongExtractor(maxAllowedLateness);

	assertEquals(maxAllowedLateness.toMilliseconds(),
		extractor.getMaxOutOfOrdernessInMillis());

	runValidTests(extractor);
}
 
Example #19
Source File: ClickEventCount.java    From flink-playgrounds with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	final ParameterTool params = ParameterTool.fromArgs(args);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	configureEnvironment(params, env);

	boolean inflictBackpressure = params.has(BACKPRESSURE_OPTION);

	String inputTopic = params.get("input-topic", "input");
	String outputTopic = params.get("output-topic", "output");
	String brokers = params.get("bootstrap.servers", "localhost:9092");
	Properties kafkaProps = new Properties();
	kafkaProps.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers);
	kafkaProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "click-event-count");

	DataStream<ClickEvent> clicks =
			env.addSource(new FlinkKafkaConsumer<>(inputTopic, new ClickEventDeserializationSchema(), kafkaProps))
		.name("ClickEvent Source")
		.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ClickEvent>(Time.of(200, TimeUnit.MILLISECONDS)) {
			@Override
			public long extractTimestamp(final ClickEvent element) {
				return element.getTimestamp().getTime();
			}
		});

	if (inflictBackpressure) {
		// Force a network shuffle so that the backpressure will affect the buffer pools
		clicks = clicks
			.keyBy(ClickEvent::getPage)
			.map(new BackpressureMap())
			.name("Backpressure");
	}

	DataStream<ClickEventStatistics> statistics = clicks
		.keyBy(ClickEvent::getPage)
		.timeWindow(WINDOW_SIZE)
		.aggregate(new CountingAggregator(),
			new ClickEventStatisticsCollector())
		.name("ClickEvent Counter");

	statistics
		.addSink(new FlinkKafkaProducer<>(
			outputTopic,
			new ClickEventStatisticsSerializationSchema(outputTopic),
			kafkaProps,
			FlinkKafkaProducer.Semantic.AT_LEAST_ONCE))
		.name("ClickEventStatistics Sink");

	env.execute("Click Event Count");
}
 
Example #20
Source File: TurbineHeatProcessor.java    From pravega-samples with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

        ParameterTool params = ParameterTool.fromArgs(args);
        PravegaConfig pravegaConfig = PravegaConfig
                .fromParams(params)
                .withDefaultScope("examples");

        // ensure that the scope and stream exist
        Stream stream = Utils.createStream(
                pravegaConfig,
                params.get("input", "turbineHeatTest"),
                StreamConfiguration.builder().scalingPolicy(ScalingPolicy.fixed(1)).build());

        // set up the streaming execution environment
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.setParallelism(1); // required since on a multi core CPU machine, the watermark is not advancing due to idle sources and causing window not to trigger

        // 1. read and decode the sensor events from a Pravega stream
        FlinkPravegaReader<String> source = FlinkPravegaReader.<String>builder()
                .withPravegaConfig(pravegaConfig)
                .forStream(stream)
                .withDeserializationSchema(PravegaSerialization.deserializationFor(String.class))
                .build();
        DataStream<SensorEvent> events = env.addSource(source, "input").map(new SensorMapper()).name("events");

        // 2. extract timestamp information to support 'event-time' processing
        SingleOutputStreamOperator<SensorEvent> timestamped = events.assignTimestampsAndWatermarks(
                new BoundedOutOfOrdernessTimestampExtractor<SensorEvent>(Time.seconds(10)) {
            @Override
            public long extractTimestamp(SensorEvent element) {
                return element.getTimestamp();
            }
        });

        // 3. summarize the temperature data for each sensor
        SingleOutputStreamOperator<SensorAggregate> summaries = timestamped
                .keyBy("sensorId")
                .window(TumblingEventTimeWindows.of(Time.days(1), Time.hours(8)))
                .fold(null, new SensorAggregator()).name("summaries");

        // 4. save to HDFS and print to stdout.  Refer to the TaskManager's 'Stdout' view in the Flink UI.
        summaries.print().name("stdout");
        if (params.has("output")) {
            summaries.writeAsCsv(params.getRequired("output"), FileSystem.WriteMode.OVERWRITE);
        }

        env.execute("TurbineHeatProcessor_" + stream);
    }
 
Example #21
Source File: StreamingETL.java    From flink-streaming-etl with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse arguments
	ParameterTool params = ParameterTool.fromPropertiesFile(args[0]);

	// create streaming environment
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// enable event time processing
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	// enable fault-tolerance
	env.enableCheckpointing(1000);

	// enable restarts
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(50, 500L));

	env.setStateBackend(new FsStateBackend("file:///home/robert/flink-workdir/flink-streaming-etl/state-backend"));

	// run each operator separately
	env.disableOperatorChaining();

	// get data from Kafka
	Properties kParams = params.getProperties();
	kParams.setProperty("group.id", UUID.randomUUID().toString());
	DataStream<ObjectNode> inputStream = env.addSource(new FlinkKafkaConsumer09<>(params.getRequired("topic"), new JSONDeserializationSchema(), kParams)).name("Kafka 0.9 Source")
		.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ObjectNode>(Time.minutes(1L)) {
			@Override
			public long extractTimestamp(ObjectNode jsonNodes) {
				return jsonNodes.get("timestamp_ms").asLong();
			}
		}).name("Timestamp extractor");

	// filter out records without lang field
	DataStream<ObjectNode> tweetsWithLang = inputStream.filter(jsonNode -> jsonNode.has("user") && jsonNode.get("user").has("lang")).name("Filter records without 'lang' field");

	// select only lang = "en" tweets
	DataStream<ObjectNode> englishTweets = tweetsWithLang.filter(jsonNode -> jsonNode.get("user").get("lang").asText().equals("en")).name("Select 'lang'=en tweets");

	// write to file system
	RollingSink<ObjectNode> rollingSink = new RollingSink<>(params.get("sinkPath", "/home/robert/flink-workdir/flink-streaming-etl/rolling-sink"));
	rollingSink.setBucketer(new DateTimeBucketer("yyyy-MM-dd-HH-mm")); // do a bucket for each minute
	englishTweets.addSink(rollingSink).name("Rolling FileSystem Sink");

	// build aggregates (count per language) using window (10 seconds tumbling):
	DataStream<Tuple3<Long, String, Long>> languageCounts = tweetsWithLang.keyBy(jsonNode -> jsonNode.get("user").get("lang").asText())
		.timeWindow(Time.seconds(10))
		.apply(new Tuple3<>(0L, "", 0L), new JsonFoldCounter(), new CountEmitter()).name("Count per Langauage (10 seconds tumbling)");

	// write window aggregate to ElasticSearch
	List<InetSocketAddress> transportNodes = ImmutableList.of(new InetSocketAddress(InetAddress.getByName("localhost"), 9300));
	ElasticsearchSink<Tuple3<Long, String, Long>> elasticsearchSink = new ElasticsearchSink<>(params.toMap(), transportNodes, new ESRequest());

	languageCounts.addSink(elasticsearchSink).name("ElasticSearch2 Sink");

	// word-count on the tweet stream
	DataStream<Tuple2<Date, List<Tuple2<String, Long>>>> topWordCount = tweetsWithLang
		// get text from tweets
		.map(tweet -> tweet.get("text").asText()).name("Get text from Tweets")
		// split text into (word, 1) tuples
		.flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
			@Override
			public void flatMap(String s, Collector<Tuple2<String, Long>> collector) throws Exception {
				String[] splits = s.split(" ");
				for (String sp : splits) {
					collector.collect(new Tuple2<>(sp, 1L));
				}
			}
		}).name("Tokenize words")
		// group by word
		.keyBy(0)
		// build 1 min windows, compute every 10 seconds --> count word frequency
		.timeWindow(Time.minutes(1L), Time.seconds(10L)).apply(new WordCountingWindow()).name("Count word frequency (1 min, 10 sec sliding window)")
		// build top n every 10 seconds
		.timeWindowAll(Time.seconds(10L)).apply(new TopNWords(10)).name("TopN Window (10s)");

	// write top Ns to Kafka topic
	topWordCount.addSink(new FlinkKafkaProducer09<>(params.getRequired("wc-topic"), new ListSerSchema(), params.getProperties())).name("Write topN to Kafka");

	env.execute("Streaming ETL");

}