Java Code Examples for org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer#setStartFromLatest()

The following examples show how to use org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer#setStartFromLatest() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: KafkaSourceProvider.java From flink-statefun with Apache License 2.0

6 votes

private static <T> void configureStartupPosition(
    FlinkKafkaConsumer<T> consumer, KafkaIngressStartupPosition startupPosition) {
  if (startupPosition.isGroupOffsets()) {
    consumer.setStartFromGroupOffsets();
  } else if (startupPosition.isEarliest()) {
    consumer.setStartFromEarliest();
  } else if (startupPosition.isLatest()) {
    consumer.setStartFromLatest();
  } else if (startupPosition.isSpecificOffsets()) {
    KafkaIngressStartupPosition.SpecificOffsetsPosition offsetsPosition =
        startupPosition.asSpecificOffsets();
    consumer.setStartFromSpecificOffsets(
        convertKafkaTopicPartitionMap(offsetsPosition.specificOffsets()));
  } else if (startupPosition.isDate()) {
    KafkaIngressStartupPosition.DatePosition datePosition = startupPosition.asDate();
    consumer.setStartFromTimestamp(datePosition.epochMilli());
  } else {
    throw new IllegalStateException("Safe guard; should not occur");
  }
}

Example 2

Source File: KafkaItemTransactionJob.java From flink-tutorials with Apache License 2.0

5 votes

public DataStream<Query> readQueryStream(ParameterTool params, StreamExecutionEnvironment env) {
	// We read queries in a simple String format and parse it to our Query object
	FlinkKafkaConsumer<Query> rawQuerySource = new FlinkKafkaConsumer<>(
			params.getRequired(QUERY_INPUT_TOPIC_KEY), new QuerySchema(),
			Utils.readKafkaProperties(params, true));

	rawQuerySource.setCommitOffsetsOnCheckpoints(true);

	// The first time the job is started we start from the end of the queue, ignoring earlier queries
	rawQuerySource.setStartFromLatest();

	return env.addSource(rawQuerySource)
			.name("Kafka Query Source")
			.uid("Kafka Query Source");
}

Example 3

Source File: KafkaSourceBuilder.java From Alink with Apache License 2.0

5 votes

@Override
public RichParallelSourceFunction<Row> build() {
    FlinkKafkaConsumer<Row> consumer;
    if (!StringUtils.isNullOrWhitespaceOnly(topicPattern)) {
        Pattern pattern = Pattern.compile(topicPattern);
        consumer = new FlinkKafkaConsumer<Row>(pattern, new MessageDeserialization(), properties);
    } else {
        consumer = new FlinkKafkaConsumer<Row>(topic, new MessageDeserialization(), properties);
    }

    switch (super.startupMode) {
        case LATEST: {
            consumer.setStartFromLatest();
            break;
        }
        case EARLIEST: {
            consumer.setStartFromEarliest();
            break;
        }
        case GROUP_OFFSETS: {
            consumer.setStartFromGroupOffsets();
            break;
        }
        case TIMESTAMP: {
            consumer.setStartFromTimestamp(startTimeMs);
            break;
        }
        default: {
            throw new IllegalArgumentException("invalid startupMode.");
        }
    }

    return consumer;
}

Example 4

Source File: Kafka240String.java From blog_demos with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    //设置并行度
    env.setParallelism(2);

    Properties properties = new Properties();
    //broker地址
    properties.setProperty("bootstrap.servers", "192.168.50.43:9092");
    //zookeeper地址
    properties.setProperty("zookeeper.connect", "192.168.50.43:2181");
    //消费者的groupId
    properties.setProperty("group.id", "flink-connector");
    //实例化Consumer类
    FlinkKafkaConsumer<String> flinkKafkaConsumer = new FlinkKafkaConsumer<>(
            "test001",
            new SimpleStringSchema(),
            properties
    );
    //指定从最新位置开始消费，相当于放弃历史消息
    flinkKafkaConsumer.setStartFromLatest();

    //通过addSource方法得到DataSource
    DataStream<String> dataStream = env.addSource(flinkKafkaConsumer);

    //从kafka取得字符串消息后，分割成单词，统计数量，窗口是5秒
    dataStream
            .flatMap(new Splitter())
            .keyBy(0)
            .timeWindow(Time.seconds(5))
            .sum(1)
            .print();

    env.execute("Connector DataSource demo : kafka");
}

Example 5

Source File: Kafka240Bean.java From blog_demos with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    //设置并行度
    env.setParallelism(2);

    Properties properties = new Properties();
    //broker地址
    properties.setProperty("bootstrap.servers", "192.168.50.43:9092");
    //zookeeper地址
    properties.setProperty("zookeeper.connect", "192.168.50.43:2181");
    //消费者的groupId
    properties.setProperty("group.id", "flink-connector");
    //实例化Consumer类
    FlinkKafkaConsumer<Student> flinkKafkaConsumer = new FlinkKafkaConsumer<>(
            "test001",
            new StudentSchema(),
            properties
    );
    //指定从最新位置开始消费，相当于放弃历史消息
    flinkKafkaConsumer.setStartFromLatest();

    //通过addSource方法得到DataSource
    DataStream<Student> dataStream = env.addSource(flinkKafkaConsumer);

    //从kafka取得的JSON被反序列化成Student实例，统计每个name的数量，窗口是5秒
    dataStream.map(new MapFunction<Student, Tuple2<String, Integer>>() {
        @Override
        public Tuple2<String, Integer> map(Student student) throws Exception {
            return new Tuple2<>(student.getName(), 1);
        }
    })
            .keyBy(0)
            .timeWindow(Time.seconds(5))
            .sum(1)
            .print();

    env.execute("Connector DataSource demo : kafka bean");
}

Example 6

Source File: CassandraPojoSink.java From blog_demos with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    //设置并行度
    env.setParallelism(1);

    //连接kafka用到的属性对象
    Properties properties = new Properties();
    //broker地址
    properties.setProperty("bootstrap.servers", "192.168.50.43:9092");
    //zookeeper地址
    properties.setProperty("zookeeper.connect", "192.168.50.43:2181");
    //消费者的groupId
    properties.setProperty("group.id", "flink-connector");
    //实例化Consumer类
    FlinkKafkaConsumer<String> flinkKafkaConsumer = new FlinkKafkaConsumer<>(
            "test001",
            new SimpleStringSchema(),
            properties
    );

    //指定从最新位置开始消费，相当于放弃历史消息
    flinkKafkaConsumer.setStartFromLatest();

    //通过addSource方法得到DataSource
    DataStream<String> dataStream = env.addSource(flinkKafkaConsumer);

    DataStream<WordCount> result = dataStream
            .flatMap(new FlatMapFunction<String, WordCount>() {
                @Override
                public void flatMap(String s, Collector<WordCount> collector) throws Exception {
                    String[] words = s.toLowerCase().split("\\s");

                    for (String word : words) {
                        if (!word.isEmpty()) {
                            //cassandra的表中，每个word都是主键，因此不能为空
                            collector.collect(new WordCount(word, 1L));
                        }
                    }
                }
            })
            .keyBy("word")
            .timeWindow(Time.seconds(5))
            .reduce(new ReduceFunction<WordCount>() {
                @Override
                public WordCount reduce(WordCount wordCount, WordCount t1) throws Exception {
                    return new WordCount(wordCount.getWord(), wordCount.getCount() + t1.getCount());
                }
            });

    result.addSink(new PrintSinkFunction<>())
            .name("print Sink")
            .disableChaining();

    CassandraSink.addSink(result)
            .setHost("192.168.133.168")
            .setMapperOptions(() -> new Mapper.Option[] { Mapper.Option.saveNullFields(true) })
            .build()
            .name("cassandra Sink")
            .disableChaining();

    env.execute("kafka-2.4 source, cassandra-3.11.6 sink, pojo");
}

Example 7

Source File: CassandraTuple2Sink.java From blog_demos with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    //设置并行度
    env.setParallelism(1);

    //连接kafka用到的属性对象
    Properties properties = new Properties();
    //broker地址
    properties.setProperty("bootstrap.servers", "192.168.50.43:9092");
    //zookeeper地址
    properties.setProperty("zookeeper.connect", "192.168.50.43:2181");
    //消费者的groupId
    properties.setProperty("group.id", "flink-connector");
    //实例化Consumer类
    FlinkKafkaConsumer<String> flinkKafkaConsumer = new FlinkKafkaConsumer<>(
            "test001",
            new SimpleStringSchema(),
            properties
    );

    //指定从最新位置开始消费，相当于放弃历史消息
    flinkKafkaConsumer.setStartFromLatest();

    //通过addSource方法得到DataSource
    DataStream<String> dataStream = env.addSource(flinkKafkaConsumer);

    DataStream<Tuple2<String, Long>> result = dataStream
            .flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
                         @Override
                         public void flatMap(String value, Collector<Tuple2<String, Long>> out) {
                             String[] words = value.toLowerCase().split("\\s");

                             for (String word : words) {
                                 //cassandra的表中，每个word都是主键，因此不能为空
                                 if (!word.isEmpty()) {
                                     out.collect(new Tuple2<String, Long>(word, 1L));
                                 }
                             }
                         }
                     }
            )
            .keyBy(0)
            .timeWindow(Time.seconds(5))
            .sum(1);

    result.addSink(new PrintSinkFunction<>())
            .name("print Sink")
            .disableChaining();

    CassandraSink.addSink(result)
            .setQuery("INSERT INTO example.wordcount(word, count) values (?, ?);")
            .setHost("192.168.133.168")
            .build()
            .name("cassandra Sink")
            .disableChaining();

    env.execute("kafka-2.4 source, cassandra-3.11.6 sink, tuple2");
}