Java Code Examples for org.apache.flink.streaming.api.datastream.DataStream#assignTimestampsAndWatermarks()

The following examples show how to use org.apache.flink.streaming.api.datastream.DataStream#assignTimestampsAndWatermarks() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataStreamTest.java    From flink with Apache License 2.0 7 votes vote down vote up
/**
 * Ensure that WatermarkStrategy is easy to use in the API, without superfluous generics.
 */
@Test
public void testErgonomicWatermarkStrategy() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<String> input = env.fromElements("bonjour");

	// as soon as you have a chain of methods the first call needs a generic
	input.assignTimestampsAndWatermarks(
			WatermarkStrategy
					.forBoundedOutOfOrderness(Duration.ofMillis(10)));

	// as soon as you have a chain of methods the first call needs to specify the generic type
	input.assignTimestampsAndWatermarks(
			WatermarkStrategy
					.<String>forBoundedOutOfOrderness(Duration.ofMillis(10))
					.withTimestampAssigner((event, timestamp) -> 42L));
}
 
Example 2
Source File: AscendingAssigner.java    From flink-simple-tutorial with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // 指定系统时间概念为 event time
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        List<Tuple2<String, Long>> collectionInput = new ArrayList<>();
        Tuple2<String, Long> a = new Tuple2<>("first event", 1L);
        Tuple2<String, Long> b = new Tuple2<>("second event", 2L);
        collectionInput.add(a);
        collectionInput.add(b);

        // 使用 Ascending 分配 时间信息和 watermark
        DataStream<Tuple2<String, Long>> text = env.fromCollection(collectionInput);
        text.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Tuple2<String, Long>>() {
            @Override
            public long extractAscendingTimestamp(Tuple2<String, Long> element) {
                return element.f1;
            }
        });

        env.execute();
    }
 
Example 3
Source File: BoundedAssigner.java    From flink-simple-tutorial with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // 指定系统时间概念为 event time
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        List<Tuple2<String, Long>> collectionInput = new ArrayList<>();
        Tuple2<String, Long> a = new Tuple2<>("first event", 1L);
        Tuple2<String, Long> b = new Tuple2<>("second event", 2L);
        collectionInput.add(a);
        collectionInput.add(b);

        // 使用 Ascending 分配 时间信息和 watermark 设定10s 代表最长的时延
        DataStream<Tuple2<String, Long>> text = env.fromCollection(collectionInput);
        text.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Tuple2<String, Long>>(Time.seconds(10)) {
            @Override
            public long extractTimestamp(Tuple2<String, Long> element) {
                return element.f1;
            }
        });

        env.execute();
    }
 
Example 4
Source File: SiddhiCEPITCase.java    From flink-siddhi with Apache License 2.0 6 votes vote down vote up
@Test
public void testUnboundedPojoStreamAndReturnPojo() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Event> input = env.addSource(new RandomEventSource(5));
    input.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Event>() {
        @Override
        public long extractAscendingTimestamp(Event element) {
            return element.getTimestamp();
        }
    });

    DataStream<Event> output = SiddhiCEP
        .define("inputStream", input, "id", "name", "price", "timestamp")
        .cql("from inputStream select timestamp, id, name, price insert into  outputStream")
        .returns("outputStream", Event.class);

    String resultPath = tempFolder.newFile().toURI().toString();
    output.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
    assertEquals(5, getLineCount(resultPath));
}
 
Example 5
Source File: SiddhiCEPITCase.java    From bahir-flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testUnboundedPojoStreamAndReturnPojo() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Event> input = env.addSource(new RandomEventSource(5));
    input.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Event>() {
        @Override
        public long extractAscendingTimestamp(Event element) {
            return element.getTimestamp();
        }
    });

    DataStream<Event> output = SiddhiCEP
        .define("inputStream", input, "id", "name", "price", "timestamp")
        .cql("from inputStream select timestamp, id, name, price insert into  outputStream")
        .returns("outputStream", Event.class);

    String resultPath = tempFolder.newFile().toURI().toString();
    output.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    env.execute();
    assertEquals(5, getLineCount(resultPath));
}
 
Example 6
Source File: TimestampITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * This tests whether timestamps are properly extracted in the timestamp
 * extractor and whether watermarks are also correctly forwarded from this with the auto watermark
 * interval.
 */
@Test
public void testTimestampExtractorWithAutoInterval() throws Exception {
	final int numElements = 10;

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	env.getConfig().setAutoWatermarkInterval(10);
	env.setParallelism(1);
	env.getConfig().disableSysoutLogging();

	DataStream<Integer> source1 = env.addSource(new SourceFunction<Integer>() {
		@Override
		public void run(SourceContext<Integer> ctx) throws Exception {
			int index = 1;
			while (index <= numElements) {
				ctx.collect(index);
				latch.await();
				index++;
			}
		}

		@Override
		public void cancel() {}
	});

	DataStream<Integer> extractOp = source1.assignTimestampsAndWatermarks(
			new AscendingTimestampExtractor<Integer>() {
				@Override
				public long extractAscendingTimestamp(Integer element) {
					return element;
				}
			});

	extractOp
			.transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true))
			.transform("Timestamp Check",
					BasicTypeInfo.INT_TYPE_INFO,
					new TimestampCheckingOperator());

	// verify that extractor picks up source parallelism
	Assert.assertEquals(extractOp.getTransformation().getParallelism(), source1.getTransformation().getParallelism());

	env.execute();

	// verify that we get NUM_ELEMENTS watermarks
	for (int j = 0; j < numElements; j++) {
		if (!CustomOperator.finalWatermarks[0].get(j).equals(new Watermark(j))) {
			long wm = CustomOperator.finalWatermarks[0].get(j).getTimestamp();
			Assert.fail("Wrong watermark. Expected: " + j + " Found: " + wm + " All: " + CustomOperator.finalWatermarks[0]);
		}
	}

	// the input is finite, so it should have a MAX Watermark
	assertEquals(Watermark.MAX_WATERMARK,
			CustomOperator.finalWatermarks[0].get(CustomOperator.finalWatermarks[0].size() - 1));
}
 
Example 7
Source File: TimestampITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * This tests whether timestamps are properly extracted in the timestamp
 * extractor and whether watermarks are also correctly forwarded from this with the auto watermark
 * interval.
 */
@Test
public void testTimestampExtractorWithAutoInterval() throws Exception {
	final int numElements = 10;

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	env.getConfig().setAutoWatermarkInterval(10);
	env.setParallelism(1);
	env.getConfig().disableSysoutLogging();

	DataStream<Integer> source1 = env.addSource(new SourceFunction<Integer>() {
		@Override
		public void run(SourceContext<Integer> ctx) throws Exception {
			int index = 1;
			while (index <= numElements) {
				ctx.collect(index);
				latch.await();
				index++;
			}
		}

		@Override
		public void cancel() {}
	});

	DataStream<Integer> extractOp = source1.assignTimestampsAndWatermarks(
			new AscendingTimestampExtractor<Integer>() {
				@Override
				public long extractAscendingTimestamp(Integer element) {
					return element;
				}
			});

	extractOp
			.transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true))
			.transform("Timestamp Check",
					BasicTypeInfo.INT_TYPE_INFO,
					new TimestampCheckingOperator());

	// verify that extractor picks up source parallelism
	Assert.assertEquals(extractOp.getTransformation().getParallelism(), source1.getTransformation().getParallelism());

	env.execute();

	// verify that we get NUM_ELEMENTS watermarks
	for (int j = 0; j < numElements; j++) {
		if (!CustomOperator.finalWatermarks[0].get(j).equals(new Watermark(j))) {
			long wm = CustomOperator.finalWatermarks[0].get(j).getTimestamp();
			Assert.fail("Wrong watermark. Expected: " + j + " Found: " + wm + " All: " + CustomOperator.finalWatermarks[0]);
		}
	}

	// the input is finite, so it should have a MAX Watermark
	assertEquals(Watermark.MAX_WATERMARK,
			CustomOperator.finalWatermarks[0].get(CustomOperator.finalWatermarks[0].size() - 1));
}
 
Example 8
Source File: TurbineHeatProcessor.java    From pravega-samples with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

        ParameterTool params = ParameterTool.fromArgs(args);
        PravegaConfig pravegaConfig = PravegaConfig
                .fromParams(params)
                .withDefaultScope("examples");

        // ensure that the scope and stream exist
        Stream stream = Utils.createStream(
                pravegaConfig,
                params.get("input", "turbineHeatTest"),
                StreamConfiguration.builder().scalingPolicy(ScalingPolicy.fixed(1)).build());

        // set up the streaming execution environment
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.setParallelism(1); // required since on a multi core CPU machine, the watermark is not advancing due to idle sources and causing window not to trigger

        // 1. read and decode the sensor events from a Pravega stream
        FlinkPravegaReader<String> source = FlinkPravegaReader.<String>builder()
                .withPravegaConfig(pravegaConfig)
                .forStream(stream)
                .withDeserializationSchema(PravegaSerialization.deserializationFor(String.class))
                .build();
        DataStream<SensorEvent> events = env.addSource(source, "input").map(new SensorMapper()).name("events");

        // 2. extract timestamp information to support 'event-time' processing
        SingleOutputStreamOperator<SensorEvent> timestamped = events.assignTimestampsAndWatermarks(
                new BoundedOutOfOrdernessTimestampExtractor<SensorEvent>(Time.seconds(10)) {
            @Override
            public long extractTimestamp(SensorEvent element) {
                return element.getTimestamp();
            }
        });

        // 3. summarize the temperature data for each sensor
        SingleOutputStreamOperator<SensorAggregate> summaries = timestamped
                .keyBy("sensorId")
                .window(TumblingEventTimeWindows.of(Time.days(1), Time.hours(8)))
                .fold(null, new SensorAggregator()).name("summaries");

        // 4. save to HDFS and print to stdout.  Refer to the TaskManager's 'Stdout' view in the Flink UI.
        summaries.print().name("stdout");
        if (params.has("output")) {
            summaries.writeAsCsv(params.getRequired("output"), FileSystem.WriteMode.OVERWRITE);
        }

        env.execute("TurbineHeatProcessor_" + stream);
    }
 
Example 9
Source File: TimestampITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * This tests whether timestamps are properly extracted in the timestamp
 * extractor and whether watermarks are also correctly forwarded from this with the auto watermark
 * interval.
 */
@Test
public void testTimestampExtractorWithAutoInterval() throws Exception {
	final int numElements = 10;

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	env.getConfig().setAutoWatermarkInterval(10);
	env.setParallelism(1);

	DataStream<Integer> source1 = env.addSource(new SourceFunction<Integer>() {
		@Override
		public void run(SourceContext<Integer> ctx) throws Exception {
			int index = 1;
			while (index <= numElements) {
				ctx.collect(index);
				latch.await();
				index++;
			}
		}

		@Override
		public void cancel() {}
	});

	DataStream<Integer> extractOp = source1.assignTimestampsAndWatermarks(
			new AscendingTimestampExtractor<Integer>() {
				@Override
				public long extractAscendingTimestamp(Integer element) {
					return element;
				}
			});

	extractOp
			.transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true))
			.transform("Timestamp Check",
					BasicTypeInfo.INT_TYPE_INFO,
					new TimestampCheckingOperator());

	// verify that extractor picks up source parallelism
	Assert.assertEquals(extractOp.getTransformation().getParallelism(), source1.getTransformation().getParallelism());

	env.execute();

	// verify that we get NUM_ELEMENTS watermarks
	for (int j = 0; j < numElements; j++) {
		if (!CustomOperator.finalWatermarks[0].get(j).equals(new Watermark(j))) {
			long wm = CustomOperator.finalWatermarks[0].get(j).getTimestamp();
			Assert.fail("Wrong watermark. Expected: " + j + " Found: " + wm + " All: " + CustomOperator.finalWatermarks[0]);
		}
	}

	// the input is finite, so it should have a MAX Watermark
	assertEquals(Watermark.MAX_WATERMARK,
			CustomOperator.finalWatermarks[0].get(CustomOperator.finalWatermarks[0].size() - 1));
}
 
Example 10
Source File: BenchmarkJob.java    From scotty-window-processor with Apache License 2.0 2 votes vote down vote up
public BenchmarkJob(List<Window> assigner, StreamExecutionEnvironment env, final long runtime,
					final int throughput, final List<Tuple2<Long, Long>> gaps) {


	Map<String, String> configMap = new HashMap<>();
	ParameterTool parameters = ParameterTool.fromMap(configMap);

	env.getConfig().setGlobalJobParameters(parameters);
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	env.setParallelism(1);
	env.setMaxParallelism(1);


	KeyedScottyWindowOperator<Tuple, Tuple4<String, Integer, Long, Long>, Tuple4<String, Integer, Long, Long>> windowOperator =
			new KeyedScottyWindowOperator<>(new SumAggregation());

	for(Window w: assigner){
		windowOperator.addWindow(w);
	}


	DataStream<Tuple4<String, Integer, Long, Long>> messageStream = env
		.addSource(new de.tub.dima.scotty.flinkBenchmark.LoadGeneratorSource(runtime, throughput,  gaps));

	messageStream.flatMap(new de.tub.dima.scotty.flinkBenchmark.ThroughputLogger<>(200, throughput));



	final SingleOutputStreamOperator<Tuple4<String, Integer, Long, Long>> timestampsAndWatermarks = messageStream
		.assignTimestampsAndWatermarks(new TimestampsAndWatermarks());



	timestampsAndWatermarks
			.keyBy(0)
			.process(windowOperator)
			.addSink(new SinkFunction() {

				@Override
				public void invoke(final Object value) throws Exception {
					//System.out.println(value);
				}
			});

	try {
		env.execute();

	} catch (Exception e) {
		e.printStackTrace();
	}

}
 
Example 11
Source File: SimpleEdgeStream.java    From gelly-streaming with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a graph from an edge stream operating in event time specified by timeExtractor .
 * 
 * The time characteristic is set to event time.
 * 
 * @see {@link org.apache.flink.streaming.api.TimeCharacteristic}
 * 
 * @param edges a DataStream of edges.
 * @param timeExtractor the timestamp extractor.
 * @param context the execution environment.
    */
public SimpleEdgeStream(DataStream<Edge<K, EV>> edges, AscendingTimestampExtractor<Edge<K,EV>> timeExtractor, StreamExecutionEnvironment context) {
	context.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	this.edges = edges.assignTimestampsAndWatermarks(timeExtractor);
	this.context = context;
}