Java Code Examples for org.apache.flink.streaming.api.datastream.DataStream#print()

The following examples show how to use org.apache.flink.streaming.api.datastream.DataStream#print() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KinesisConsumerMain.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);

    Properties kinesisConsumerConfig = new Properties();
    kinesisConsumerConfig.setProperty("aws.region", parameterTool.getRequired("aws.region"));
    kinesisConsumerConfig.setProperty("aws.credentials.provider.basic.accesskeyid", parameterTool.getRequired("aws.accesskey"));
    kinesisConsumerConfig.setProperty("aws.credentials.provider.basic.secretkey", parameterTool.getRequired("aws.secretkey"));

    DataStream<String> kinesis = env.addSource(new FlinkKinesisConsumer<>(
            "zhisheng",
            new SimpleStringSchema(),
            kinesisConsumerConfig));

    kinesis.print();

    env.execute();
}
 
Example 2
Source File: ConsumeFromKinesis.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.setParallelism(1);

	Properties kinesisConsumerConfig = new Properties();
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));

	DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>(
		"flink-test",
		new SimpleStringSchema(),
		kinesisConsumerConfig));

	kinesis.print();

	see.execute();
}
 
Example 3
Source File: StreamWordCountExample.java    From toolbox with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStream<Tuple2<String, Integer>> dataStream = env
                .fromElements("Who's there?",
                        "I think I hear them. Stand, ho! Who's there?")
                //.socketTextStream("localhost", 9999)
                .flatMap(new Splitter())
                .keyBy(0)
                .sum(1);

        dataStream.print();

        env.execute();

        //env.execute("Socket Stream WordCount");
    }
 
Example 4
Source File: KinesisConsumerMain.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);

    Properties kinesisConsumerConfig = new Properties();
    kinesisConsumerConfig.setProperty("aws.region", parameterTool.getRequired("aws.region"));
    kinesisConsumerConfig.setProperty("aws.credentials.provider.basic.accesskeyid", parameterTool.getRequired("aws.accesskey"));
    kinesisConsumerConfig.setProperty("aws.credentials.provider.basic.secretkey", parameterTool.getRequired("aws.secretkey"));

    DataStream<String> kinesis = env.addSource(new FlinkKinesisConsumer<>(
            "zhisheng",
            new SimpleStringSchema(),
            kinesisConsumerConfig));

    kinesis.print();

    env.execute();
}
 
Example 5
Source File: ExactTriangleCount.java    From gelly-streaming with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		SimpleEdgeStream<Integer, NullValue> edges = getGraphStream(env);

		DataStream<Tuple2<Integer, Integer>> result =
				edges.buildNeighborhood(false)
				.map(new ProjectCanonicalEdges())
				.keyBy(0, 1).flatMap(new IntersectNeighborhoods())
				.keyBy(0).flatMap(new SumAndEmitCounters());

		if (resultPath != null) {
			result.writeAsText(resultPath);
		}
		else {
			result.print();
		}

		env.execute("Exact Triangle Count");
	}
 
Example 6
Source File: RollingSum.java    From examples-java with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

        // set up the streaming execution environment
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStream<Tuple3<Integer, Integer, Integer>> inputStream = env.fromElements(
            Tuple3.of(1, 2, 2), Tuple3.of(2, 3, 1), Tuple3.of(2, 2, 4), Tuple3.of(1, 5, 3));

        DataStream<Tuple3<Integer, Integer, Integer>> resultStream = inputStream
            .keyBy(0) // key on first field of tuples
            .sum(1); // sum the second field of the tuple

        resultStream.print();

        // execute the application
        env.execute();
    }
 
Example 7
Source File: WindowWordCount.java    From flinkDemo with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStream<Tuple2<String, Integer>> dataStream = env
        //.addSource(new RandomEventSource(5).closeDelay(1500));
                .socketTextStream("localhost", 8080)
                .flatMap(new Splitter())
                .keyBy(0)
                .timeWindow(Time.seconds(5))
                .sum(1);

        dataStream.print();
        env.execute();
    }
 
Example 8
Source File: MatrixVectorMul.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);
		System.out.println("Usage: MatrixVectorMul [--output <path>] [--dimension <dimension> --data-size <data_size>] [--resource-name <resource_name>]");

		// Set up the execution environment
		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// Make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		final int dimension = params.getInt("dimension", DEFAULT_DIM);
		final int dataSize = params.getInt("data-size", DEFAULT_DATA_SIZE);
		final String resourceName = params.get("resource-name", DEFAULT_RESOURCE_NAME);

		DataStream<List<Float>> result = env.addSource(new RandomVectorSource(dimension, dataSize))
						.map(new Multiplier(dimension, resourceName));

		// Emit result
		if (params.has("output")) {
			result.addSink(StreamingFileSink.forRowFormat(new Path(params.get("output")),
					new SimpleStringEncoder<List<Float>>()).build());
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}
		// Execute program
		env.execute("Matrix-Vector Multiplication");
	}
 
Example 9
Source File: App.java    From Mastering-Apache-Flink with MIT License 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	DataStream<TemperatureEvent> inputEventStream = env.fromElements(new TemperatureEvent("xyz", 22.0),
			new TemperatureEvent("xyz", 20.1), new TemperatureEvent("xyz", 21.1), new TemperatureEvent("xyz", 22.2),
			new TemperatureEvent("xyz", 29.1), new TemperatureEvent("xyz", 22.3), new TemperatureEvent("xyz", 22.1),
			new TemperatureEvent("xyz", 22.4), new TemperatureEvent("xyz", 22.7),
			new TemperatureEvent("xyz", 27.0));

	Pattern<TemperatureEvent, ?> warningPattern = Pattern.<TemperatureEvent> begin("first")
			.subtype(TemperatureEvent.class).where(new FilterFunction<TemperatureEvent>() {
				private static final long serialVersionUID = 1L;

				public boolean filter(TemperatureEvent value) {
					if (value.getTemperature() >= 26.0) {
						return true;
					}
					return false;
				}
			}).within(Time.seconds(10));

	DataStream<Alert> patternStream = CEP.pattern(inputEventStream, warningPattern)
			.select(new PatternSelectFunction<TemperatureEvent, Alert>() {
				private static final long serialVersionUID = 1L;

				public Alert select(Map<String, TemperatureEvent> event) throws Exception {

					return new Alert("Temperature Rise Detected");
				}

			});

	patternStream.print();
	env.execute("CEP on Temperature Sensor");
}
 
Example 10
Source File: WriteIntoKafka.java    From flinkDemo with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    // create execution environment
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    Map properties= new HashMap();
    properties.put("bootstrap.servers", "192.168.10.63:6667,192.168.10.64:6667,192.168.10.65:6667");
    properties.put("group.id", "t10");
    properties.put("enable.auto.commit", "false");
    properties.put("auto.commit.interval.ms", "1000");
    properties.put("auto.offset.reset", "earliest");
    properties.put("session.timeout.ms", "30000");
    properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    properties.put("topic", "kks-topic-FFT");
    //KafkaConsumer<String,String> kafkaConsumer = new KafkaConsumer<String, String>(properties);
    // parse user parameters
    //ParameterTool parameterTool = ParameterTool.fromArgs(args);
    ParameterTool parameterTool = ParameterTool.fromMap(properties);

    // add a simple source which is writing some strings
    DataStream<String> messageStream = env.addSource(new SimpleStringGenerator());

    // write stream to Kafka
    messageStream.addSink(new FlinkKafkaProducer010<>(parameterTool.getRequired("bootstrap.servers"),
            parameterTool.getRequired("topic"),
            new SimpleStringSchema()));

    messageStream.rebalance().map(new MapFunction<String, String>() {
        private static final long serialVersionUID = 1L;

        @Override
        public String map(String value) throws Exception {
            return value;
        }
    });

    messageStream.print();

    env.execute();
}
 
Example 11
Source File: SideOutput.java    From blog_demos with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    // 并行度为1
    env.setParallelism(1);

    // 定义OutputTag
    final OutputTag<String> outputTag = new OutputTag<String>("side-output"){};

    // 创建一个List,里面有两个Tuple2元素
    List<Tuple2<String, Integer>> list = new ArrayList<>();
    list.add(new Tuple2("aaa", 1));
    list.add(new Tuple2("bbb", 2));
    list.add(new Tuple2("ccc", 3));

    //通过List创建DataStream
    DataStream<Tuple2<String, Integer>> fromCollectionDataStream = env.fromCollection(list);

    //所有元素都进入mainDataStream,f1字段为奇数的元素进入SideOutput
    SingleOutputStreamOperator<String> mainDataStream = fromCollectionDataStream
            .process(new ProcessFunction<Tuple2<String, Integer>, String>() {
                @Override
                public void processElement(Tuple2<String, Integer> value, Context ctx, Collector<String> out) throws Exception {

                    //进入主流程的下一个算子
                    out.collect("main, name : " + value.f0 + ", value : " + value.f1);

                    //f1字段为奇数的元素进入SideOutput
                    if(1 == value.f1 % 2) {
                        ctx.output(outputTag, "side, name : " + value.f0 + ", value : " + value.f1);
                    }
                }
            });

    // 禁止chanin,这样可以在页面上看清楚原始的DAG
    mainDataStream.disableChaining();

    // 取得旁路数据
    DataStream<String> sideDataStream = mainDataStream.getSideOutput(outputTag);


    mainDataStream.print();
    sideDataStream.print();

    env.execute("processfunction demo : sideoutput");
}
 
Example 12
Source File: DataStreamTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testChannelSelectors() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<Long> src = env.generateSequence(0, 0);

	DataStream<Long> broadcast = src.broadcast();
	DataStreamSink<Long> broadcastSink = broadcast.print();
	StreamPartitioner<?> broadcastPartitioner =
			env.getStreamGraph().getStreamEdges(src.getId(),
					broadcastSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(broadcastPartitioner instanceof BroadcastPartitioner);

	DataStream<Long> shuffle = src.shuffle();
	DataStreamSink<Long> shuffleSink = shuffle.print();
	StreamPartitioner<?> shufflePartitioner =
			env.getStreamGraph().getStreamEdges(src.getId(),
					shuffleSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(shufflePartitioner instanceof ShufflePartitioner);

	DataStream<Long> forward = src.forward();
	DataStreamSink<Long> forwardSink = forward.print();
	StreamPartitioner<?> forwardPartitioner =
			env.getStreamGraph().getStreamEdges(src.getId(),
					forwardSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(forwardPartitioner instanceof ForwardPartitioner);

	DataStream<Long> rebalance = src.rebalance();
	DataStreamSink<Long> rebalanceSink = rebalance.print();
	StreamPartitioner<?> rebalancePartitioner =
			env.getStreamGraph().getStreamEdges(src.getId(),
					rebalanceSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(rebalancePartitioner instanceof RebalancePartitioner);

	DataStream<Long> global = src.global();
	DataStreamSink<Long> globalSink = global.print();
	StreamPartitioner<?> globalPartitioner =
			env.getStreamGraph().getStreamEdges(src.getId(),
					globalSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(globalPartitioner instanceof GlobalPartitioner);
}
 
Example 13
Source File: StreamingJob.java    From Mastering-Apache-Flink with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	// set up the streaming execution environment
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// env.enableCheckpointing(5000);
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	Properties properties = new Properties();
	properties.setProperty("bootstrap.servers", "localhost:9092");

	properties.setProperty("zookeeper.connect", "localhost:2181");
	properties.setProperty("group.id", "test");

	FlinkKafkaConsumer09<String> myConsumer = new FlinkKafkaConsumer09<>("temp", new SimpleStringSchema(),
			properties);
	myConsumer.assignTimestampsAndWatermarks(new CustomWatermarkEmitter());


	DataStream<Tuple2<String, Double>> keyedStream = env.addSource(myConsumer).flatMap(new Splitter()).keyBy(0)
			.timeWindow(Time.seconds(300))
			.apply(new WindowFunction<Tuple2<String, Double>, Tuple2<String, Double>, Tuple, TimeWindow>() {

				@Override
				public void apply(Tuple key, TimeWindow window, Iterable<Tuple2<String, Double>> input,
						Collector<Tuple2<String, Double>> out) throws Exception {
					double sum = 0L;
					int count = 0;
					for (Tuple2<String, Double> record : input) {
						sum += record.f1;
						count++;
					}

					Tuple2<String, Double> result = input.iterator().next();
					result.f1 = (sum/count);
					out.collect(result);

				}
			});

	keyedStream.print();

	// execute program
	env.execute("Flink Streaming Java API Skeleton");
}
 
Example 14
Source File: SessionWindow.java    From flink-simple-tutorial with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.setParallelism(1);

        // 构建输入数据, 希望目标是实现 3s 的 Session Gap
        final List<Tuple3<String, Long, Integer>> input = new ArrayList<>();

        input.add(new Tuple3<>("a", 1L, 1));
        input.add(new Tuple3<>("b", 1L, 1));
        input.add(new Tuple3<>("b", 3L, 1));
        input.add(new Tuple3<>("b", 5L, 1));
        input.add(new Tuple3<>("c", 6L, 1));
        // 下面 'a'和 'c' 的本次出现实现与上一次已经超过了 3s, 因此应该是一个新的窗口的起点
        input.add(new Tuple3<>("a", 10L, 1));
        input.add(new Tuple3<>("c", 11L, 1));

        DataStream<Tuple3<String, Long, Integer>> source = env
                .addSource(new SourceFunction<Tuple3<String, Long, Integer>>() {
                    private static final long serialVersionUID = 1L;

                    @Override
                    public void run(SourceContext<Tuple3<String, Long, Integer>> ctx) throws Exception {
                        for (Tuple3<String, Long, Integer> value : input) {
                            ctx.collectWithTimestamp(value, value.f1);
                            ctx.emitWatermark(new Watermark(value.f1 - 1));
                        }
                        ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
                    }

                    @Override
                    public void cancel() {
                    }
                });

        // 创建 Session Window, 间隔为 3s
        DataStream<Tuple3<String, Long, Integer>> aggregated = source
                .keyBy(0)
                .window(EventTimeSessionWindows.withGap(Time.seconds(3L)))
                .sum(2);

        System.out.println("Printing result to stdout. Use --output to specify output path.");
        aggregated.print();

        env.execute();
    }
 
Example 15
Source File: DataStreamTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testChannelSelectors() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<Long> src = env.generateSequence(0, 0);

	DataStream<Long> broadcast = src.broadcast();
	DataStreamSink<Long> broadcastSink = broadcast.print();
	StreamPartitioner<?> broadcastPartitioner =
			getStreamGraph(env).getStreamEdges(src.getId(),
					broadcastSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(broadcastPartitioner instanceof BroadcastPartitioner);

	DataStream<Long> shuffle = src.shuffle();
	DataStreamSink<Long> shuffleSink = shuffle.print();
	StreamPartitioner<?> shufflePartitioner =
			getStreamGraph(env).getStreamEdges(src.getId(),
					shuffleSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(shufflePartitioner instanceof ShufflePartitioner);

	DataStream<Long> forward = src.forward();
	DataStreamSink<Long> forwardSink = forward.print();
	StreamPartitioner<?> forwardPartitioner =
			getStreamGraph(env).getStreamEdges(src.getId(),
					forwardSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(forwardPartitioner instanceof ForwardPartitioner);

	DataStream<Long> rebalance = src.rebalance();
	DataStreamSink<Long> rebalanceSink = rebalance.print();
	StreamPartitioner<?> rebalancePartitioner =
			getStreamGraph(env).getStreamEdges(src.getId(),
					rebalanceSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(rebalancePartitioner instanceof RebalancePartitioner);

	DataStream<Long> global = src.global();
	DataStreamSink<Long> globalSink = global.print();
	StreamPartitioner<?> globalPartitioner =
			getStreamGraph(env).getStreamEdges(src.getId(),
					globalSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(globalPartitioner instanceof GlobalPartitioner);
}
 
Example 16
Source File: DataStreamPojoITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Test composite & nested key on the Data POJO.
 */
@Test
public void testNestedKeyOnNestedPojo() throws Exception {
	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.getConfig().disableObjectReuse();
	see.setParallelism(4);

	DataStream<Data> dataStream = see.fromCollection(elements);

	DataStream<Data> summedStream = dataStream
			.keyBy("aaa", "stats.count")
			.sum("sum")
			.keyBy("aaa", "stats.count")
			.flatMap(new FlatMapFunction<Data, Data>() {
				private static final long serialVersionUID = -3678267280397950258L;
				Data[] first = new Data[3];
				@Override
				public void flatMap(Data value, Collector<Data> out) throws Exception {
					if (value.stats.count != 123) {
						throw new RuntimeException("Wrong value for value.stats.count");
					}
					if (first[value.aaa] == null) {
						first[value.aaa] = value;
						if (value.sum != 1) {
							throw new RuntimeException("Expected the sum to be one");
						}
					} else {
						if (value.sum != 2) {
							throw new RuntimeException("Expected the sum to be two");
						}
						if (first[value.aaa].aaa != value.aaa) {
							throw new RuntimeException("aaa key wrong");
						}
						if (first[value.aaa].abc != value.abc) {
							throw new RuntimeException("abc key wrong");
						}
						if (first[value.aaa].wxyz != value.wxyz) {
							throw new RuntimeException("wxyz key wrong");
						}
					}
				}
			});

	summedStream.print();

	see.execute();
}
 
Example 17
Source File: DataStreamTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testChannelSelectors() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<Long> src = env.generateSequence(0, 0);

	DataStream<Long> broadcast = src.broadcast();
	DataStreamSink<Long> broadcastSink = broadcast.print();
	StreamPartitioner<?> broadcastPartitioner =
			env.getStreamGraph().getStreamEdges(src.getId(),
					broadcastSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(broadcastPartitioner instanceof BroadcastPartitioner);

	DataStream<Long> shuffle = src.shuffle();
	DataStreamSink<Long> shuffleSink = shuffle.print();
	StreamPartitioner<?> shufflePartitioner =
			env.getStreamGraph().getStreamEdges(src.getId(),
					shuffleSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(shufflePartitioner instanceof ShufflePartitioner);

	DataStream<Long> forward = src.forward();
	DataStreamSink<Long> forwardSink = forward.print();
	StreamPartitioner<?> forwardPartitioner =
			env.getStreamGraph().getStreamEdges(src.getId(),
					forwardSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(forwardPartitioner instanceof ForwardPartitioner);

	DataStream<Long> rebalance = src.rebalance();
	DataStreamSink<Long> rebalanceSink = rebalance.print();
	StreamPartitioner<?> rebalancePartitioner =
			env.getStreamGraph().getStreamEdges(src.getId(),
					rebalanceSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(rebalancePartitioner instanceof RebalancePartitioner);

	DataStream<Long> global = src.global();
	DataStreamSink<Long> globalSink = global.print();
	StreamPartitioner<?> globalPartitioner =
			env.getStreamGraph().getStreamEdges(src.getId(),
					globalSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(globalPartitioner instanceof GlobalPartitioner);
}
 
Example 18
Source File: SessionWindowing.java    From flink with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("serial")
public static void main(String[] args) throws Exception {

	final ParameterTool params = ParameterTool.fromArgs(args);
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	env.getConfig().setGlobalJobParameters(params);
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	env.setParallelism(1);

	final boolean fileOutput = params.has("output");

	final List<Tuple3<String, Long, Integer>> input = new ArrayList<>();

	input.add(new Tuple3<>("a", 1L, 1));
	input.add(new Tuple3<>("b", 1L, 1));
	input.add(new Tuple3<>("b", 3L, 1));
	input.add(new Tuple3<>("b", 5L, 1));
	input.add(new Tuple3<>("c", 6L, 1));
	// We expect to detect the session "a" earlier than this point (the old
	// functionality can only detect here when the next starts)
	input.add(new Tuple3<>("a", 10L, 1));
	// We expect to detect session "b" and "c" at this point as well
	input.add(new Tuple3<>("c", 11L, 1));

	DataStream<Tuple3<String, Long, Integer>> source = env
			.addSource(new SourceFunction<Tuple3<String, Long, Integer>>() {
				private static final long serialVersionUID = 1L;

				@Override
				public void run(SourceContext<Tuple3<String, Long, Integer>> ctx) throws Exception {
					for (Tuple3<String, Long, Integer> value : input) {
						ctx.collectWithTimestamp(value, value.f1);
						ctx.emitWatermark(new Watermark(value.f1 - 1));
					}
					ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
				}

				@Override
				public void cancel() {
				}
			});

	// We create sessions for each id with max timeout of 3 time units
	DataStream<Tuple3<String, Long, Integer>> aggregated = source
			.keyBy(0)
			.window(EventTimeSessionWindows.withGap(Time.milliseconds(3L)))
			.sum(2);

	if (fileOutput) {
		aggregated.writeAsText(params.get("output"));
	} else {
		System.out.println("Printing result to stdout. Use --output to specify output path.");
		aggregated.print();
	}

	env.execute();
}
 
Example 19
Source File: DataStreamPojoITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Test composite key on the Data POJO (with nested fields).
 */
@Test
public void testCompositeKeyOnNestedPojo() throws Exception {
	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.getConfig().disableObjectReuse();
	see.setParallelism(3);

	DataStream<Data> dataStream = see.fromCollection(elements);

	DataStream<Data> summedStream = dataStream
			.keyBy("aaa", "abc", "wxyz")
			.sum("sum")
			.keyBy("aaa", "abc", "wxyz")
			.flatMap(new FlatMapFunction<Data, Data>() {
				private static final long serialVersionUID = 788865239171396315L;
				Data[] first = new Data[3];
				@Override
				public void flatMap(Data value, Collector<Data> out) throws Exception {
					if (first[value.aaa] == null) {
						first[value.aaa] = value;
						if (value.sum != 1) {
							throw new RuntimeException("Expected the sum to be one");
						}
					} else {
						if (value.sum != 2) {
							throw new RuntimeException("Expected the sum to be two");
						}
						if (first[value.aaa].aaa != value.aaa) {
							throw new RuntimeException("aaa key wrong");
						}
						if (first[value.aaa].abc != value.abc) {
							throw new RuntimeException("abc key wrong");
						}
						if (first[value.aaa].wxyz != value.wxyz) {
							throw new RuntimeException("wxyz key wrong");
						}
					}
				}
			});

	summedStream.print();

	see.execute();
}
 
Example 20
Source File: KafkaConsumerTestBase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Test that ensures the KafkaConsumer is properly failing if the topic doesnt exist
 * and a wrong broker was specified.
 *
 * @throws Exception
 */
public void runFailOnNoBrokerTest() throws Exception {
	try {
		Properties properties = new Properties();

		StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
		see.getConfig().disableSysoutLogging();
		see.setRestartStrategy(RestartStrategies.noRestart());
		see.setParallelism(1);

		// use wrong ports for the consumers
		properties.setProperty("bootstrap.servers", "localhost:80");
		properties.setProperty("zookeeper.connect", "localhost:80");
		properties.setProperty("group.id", "test");
		properties.setProperty("request.timeout.ms", "3000"); // let the test fail fast
		properties.setProperty("socket.timeout.ms", "3000");
		properties.setProperty("session.timeout.ms", "2000");
		properties.setProperty("fetch.max.wait.ms", "2000");
		properties.setProperty("heartbeat.interval.ms", "1000");
		properties.putAll(secureProps);
		FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer("doesntexist", new SimpleStringSchema(), properties);
		DataStream<String> stream = see.addSource(source);
		stream.print();
		see.execute("No broker test");
	} catch (JobExecutionException jee) {
		if (kafkaServer.getVersion().equals("0.9") ||
			kafkaServer.getVersion().equals("0.10") ||
			kafkaServer.getVersion().equals("0.11") ||
			kafkaServer.getVersion().equals("2.0")) {
			assertTrue(jee.getCause() instanceof TimeoutException);

			TimeoutException te = (TimeoutException) jee.getCause();

			assertEquals("Timeout expired while fetching topic metadata", te.getMessage());
		} else {
			assertTrue(jee.getCause() instanceof RuntimeException);

			RuntimeException re = (RuntimeException) jee.getCause();

			assertTrue(re.getMessage().contains("Unable to retrieve any partitions"));
		}
	}
}