org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer Java Examples

The following examples show how to use org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: KafkaExample.java From Flink-CEPplus with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
		.addSource(
			new FlinkKafkaConsumer<>(
				parameterTool.getRequired("input-topic"),
				new KafkaEventSchema(),
				parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(
		new FlinkKafkaProducer<>(
			parameterTool.getRequired("output-topic"),
			new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()),
			parameterTool.getProperties(),
			FlinkKafkaProducer.Semantic.EXACTLY_ONCE));

	env.execute("Modern Kafka Example");
}

Example #2

Source File: KafkaExample.java From flink with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
		.addSource(
			new FlinkKafkaConsumer<>(
				parameterTool.getRequired("input-topic"),
				new KafkaEventSchema(),
				parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(
		new FlinkKafkaProducer<>(
			parameterTool.getRequired("output-topic"),
			new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()),
			parameterTool.getProperties(),
			FlinkKafkaProducer.Semantic.EXACTLY_ONCE));

	env.execute("Modern Kafka Example");
}

Example #3

Source File: SpanCountJob.java From Mastering-Distributed-Tracing with MIT License

6 votes

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    Properties properties = new Properties();
    properties.setProperty("bootstrap.servers", "localhost:9092");
    properties.setProperty("group.id", "tracefeatures");

    FlinkKafkaConsumer<Span> consumer = new FlinkKafkaConsumer<>(//
            "jaeger-spans", //
            new ProtoUnmarshaler(), properties);

    // replay Kafka stream from beginning, useful for testing
    consumer.setStartFromEarliest();

    DataStream<Span> spans = env.addSource(consumer).name("spans");
    DataStream<Trace> traces = aggregateSpansToTraces(spans);
    DataStream<TraceSummary> spanCounts = countSpansByService(traces);

    spanCounts.print();
    spanCounts.addSink(ESSink.build());

    // execute program
    env.execute("Span Count Job");
}

Example #4

Source File: RoutableProtobufKafkaSourceProviderTest.java From flink-statefun with Apache License 2.0

6 votes

@Test
public void exampleUsage() {
  JsonNode ingressDefinition =
      loadAsJsonFromClassResource(
          getClass().getClassLoader(), "routable-protobuf-kafka-ingress.yaml");
  JsonIngressSpec<?> spec =
      new JsonIngressSpec<>(
          ProtobufKafkaIngressTypes.ROUTABLE_PROTOBUF_KAFKA_INGRESS_TYPE,
          new IngressIdentifier<>(Message.class, "foo", "bar"),
          ingressDefinition);

  RoutableProtobufKafkaSourceProvider provider = new RoutableProtobufKafkaSourceProvider();
  SourceFunction<?> source = provider.forSpec(spec);

  assertThat(source, instanceOf(FlinkKafkaConsumer.class));
}

Example #5

Source File: KafkaSourceProvider.java From flink-statefun with Apache License 2.0

6 votes

private static <T> void configureStartupPosition(
    FlinkKafkaConsumer<T> consumer, KafkaIngressStartupPosition startupPosition) {
  if (startupPosition.isGroupOffsets()) {
    consumer.setStartFromGroupOffsets();
  } else if (startupPosition.isEarliest()) {
    consumer.setStartFromEarliest();
  } else if (startupPosition.isLatest()) {
    consumer.setStartFromLatest();
  } else if (startupPosition.isSpecificOffsets()) {
    KafkaIngressStartupPosition.SpecificOffsetsPosition offsetsPosition =
        startupPosition.asSpecificOffsets();
    consumer.setStartFromSpecificOffsets(
        convertKafkaTopicPartitionMap(offsetsPosition.specificOffsets()));
  } else if (startupPosition.isDate()) {
    KafkaIngressStartupPosition.DatePosition datePosition = startupPosition.asDate();
    consumer.setStartFromTimestamp(datePosition.epochMilli());
  } else {
    throw new IllegalStateException("Safe guard; should not occur");
  }
}

Example #6

Source File: KafkaItemTransactionJob.java From flink-tutorials with Apache License 2.0

6 votes

public DataStream<ItemTransaction> readTransactionStream(ParameterTool params, StreamExecutionEnvironment env) {
	// We read the ItemTransaction objects directly using the schema
	FlinkKafkaConsumer<ItemTransaction> transactionSource = new FlinkKafkaConsumer<>(
			params.getRequired(TRANSACTION_INPUT_TOPIC_KEY), new TransactionSchema(),
			Utils.readKafkaProperties(params, true));

	transactionSource.setCommitOffsetsOnCheckpoints(true);
	transactionSource.setStartFromEarliest();

	// In case event time processing is enabled we assign trailing watermarks for each partition
	transactionSource.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ItemTransaction>(Time.minutes(1)) {
		@Override
		public long extractTimestamp(ItemTransaction transaction) {
			return transaction.ts;
		}
	});

	return env.addSource(transactionSource)
			.name("Kafka Transaction Source")
			.uid("Kafka Transaction Source");
}

Example #7

Source File: KafkaToHDFSSimpleJob.java From flink-tutorials with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {

		ParameterTool params = Utils.parseArgs(args);
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		FlinkKafkaConsumer<String> consumer = new FlinkKafkaConsumer<>(params.getRequired("kafkaTopic"), new SimpleStringSchema(), Utils.readKafkaProperties(params));
		DataStream<String> source = env.addSource(consumer).name("Kafka Source").uid("Kafka Source");

		StreamingFileSink<String> sink = StreamingFileSink
				.forRowFormat(new Path(params.getRequired("hdfsOutput")), new SimpleStringEncoder<String>("UTF-8"))
				.build();

		source.addSink(sink).name("FS Sink").uid("FS Sink");
		source.print();

		env.execute("Flink Streaming Secured Job Sample");
	}

Example #8

Source File: KafkaExample.java From flink with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
		.addSource(
			new FlinkKafkaConsumer<>(
				parameterTool.getRequired("input-topic"),
				new KafkaEventSchema(),
				parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(
		new FlinkKafkaProducer<>(
			parameterTool.getRequired("output-topic"),
			new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()),
			parameterTool.getProperties(),
			FlinkKafkaProducer.Semantic.EXACTLY_ONCE));

	env.execute("Modern Kafka Example");
}

Example #9

Source File: ProtobufKafkaSourceProviderTest.java From stateful-functions with Apache License 2.0

5 votes

@Test
public void exampleUsage() {
  JsonNode ingressDefinition = fromPath("protobuf-kafka-ingress.yaml");
  JsonIngressSpec<?> spec =
      new JsonIngressSpec<>(
          Constants.PROTOBUF_KAFKA_INGRESS_TYPE,
          new IngressIdentifier<>(Message.class, "foo", "bar"),
          ingressDefinition);

  ProtobufKafkaSourceProvider provider = new ProtobufKafkaSourceProvider();
  SourceFunction<?> source = provider.forSpec(spec);

  assertThat(source, instanceOf(FlinkKafkaConsumer.class));
}

Example #10

Source File: KafkaToHDFSAvroJob.java From flink-tutorials with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        ParameterTool params = Utils.parseArgs(args);

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        KafkaDeserializationSchema<Message> schema = ClouderaRegistryKafkaDeserializationSchema
                .builder(Message.class)
                .setConfig(Utils.readSchemaRegistryProperties(params))
                .build();

        FlinkKafkaConsumer<Message> consumer = new FlinkKafkaConsumer<Message>(params.getRequired(K_KAFKA_TOPIC), schema, Utils.readKafkaProperties(params));

        DataStream<String> source = env.addSource(consumer)
                .name("Kafka Source")
                .uid("Kafka Source")
                .map(record -> record.getId() + "," + record.getName() + "," + record.getDescription())
                .name("ToOutputString");

        StreamingFileSink<String> sink = StreamingFileSink
                .forRowFormat(new Path(params.getRequired(K_HDFS_OUTPUT)), new SimpleStringEncoder<String>("UTF-8"))
                .build();

        source.addSink(sink)
                .name("FS Sink")
                .uid("FS Sink");

        source.print();

        env.execute("Flink Streaming Secured Job Sample");
    }

Example #11

Source File: ProtobufKafkaSourceProvider.java From stateful-functions with Apache License 2.0

5 votes

@Override
public <T> SourceFunction<T> forSpec(IngressSpec<T> spec) {
  JsonNode json = asJsonIngressSpec(spec);
  Properties properties = kafkaClientProperties(json);
  List<String> topics = Selectors.textListAt(json, TOPICS_POINTER);
  KafkaDeserializationSchema<T> deserializationSchema = deserializationSchema(json);
  return new FlinkKafkaConsumer<>(topics, deserializationSchema, properties);
}

Example #12

Source File: KafkaItemTransactionJob.java From flink-tutorials with Apache License 2.0

5 votes

public DataStream<Query> readQueryStream(ParameterTool params, StreamExecutionEnvironment env) {
	// We read queries in a simple String format and parse it to our Query object
	FlinkKafkaConsumer<Query> rawQuerySource = new FlinkKafkaConsumer<>(
			params.getRequired(QUERY_INPUT_TOPIC_KEY), new QuerySchema(),
			Utils.readKafkaProperties(params, true));

	rawQuerySource.setCommitOffsetsOnCheckpoints(true);

	// The first time the job is started we start from the end of the queue, ignoring earlier queries
	rawQuerySource.setStartFromLatest();

	return env.addSource(rawQuerySource)
			.name("Kafka Query Source")
			.uid("Kafka Query Source");
}

Example #13

Source File: KafkaSourceProvider.java From stateful-functions with Apache License 2.0

5 votes

@Override
public <T> SourceFunction<T> forSpec(IngressSpec<T> ingressSpec) {
  KafkaIngressSpec<T> spec = asKafkaSpec(ingressSpec);

  Properties properties = new Properties();
  properties.putAll(spec.properties());
  properties.put("bootstrap.servers", spec.kafkaAddress());

  return new FlinkKafkaConsumer<>(spec.topics(), deserializationSchemaFromSpec(spec), properties);
}

Example #14

Source File: KafkaSourceProvider.java From flink-statefun with Apache License 2.0

5 votes

@Override
public <T> SourceFunction<T> forSpec(IngressSpec<T> ingressSpec) {
  KafkaIngressSpec<T> spec = asKafkaSpec(ingressSpec);

  FlinkKafkaConsumer<T> consumer =
      new FlinkKafkaConsumer<>(
          spec.topics(), deserializationSchemaFromSpec(spec), spec.properties());
  configureStartupPosition(consumer, spec.startupPosition());
  return consumer;
}

Example #15

Source File: ClickEventCount.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	final ParameterTool params = ParameterTool.fromArgs(args);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	configureEnvironment(params, env);

	String inputTopic = params.get("input-topic", "input");
	String outputTopic = params.get("output-topic", "output");
	String brokers = params.get("bootstrap.servers", "localhost:9092");
	Properties kafkaProps = new Properties();
	kafkaProps.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers);
	kafkaProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "click-event-count");

	env.addSource(new FlinkKafkaConsumer<>(inputTopic, new ClickEventDeserializationSchema(), kafkaProps))
		.name("ClickEvent Source")
		.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ClickEvent>(Time.of(200, TimeUnit.MILLISECONDS)) {
			@Override
			public long extractTimestamp(final ClickEvent element) {
				return element.getTimestamp().getTime();
			}
		})
		.keyBy(ClickEvent::getPage)
		.timeWindow(WINDOW_SIZE)
		.aggregate(new CountingAggregator(),
			new ClickEventStatisticsCollector())
		.name("ClickEvent Counter")
		.addSink(new FlinkKafkaProducer<>(
			outputTopic,
			new ClickEventStatisticsSerializationSchema(outputTopic),
			kafkaProps,
			FlinkKafkaProducer.Semantic.AT_LEAST_ONCE))
		.name("ClickEventStatistics Sink");

	env.execute("Click Event Count");
}

Example #16

Source File: ProtobufKafkaSourceProviderTest.java From flink-statefun with Apache License 2.0

5 votes

@Test
public void exampleUsage() {
  JsonNode ingressDefinition =
      loadAsJsonFromClassResource(getClass().getClassLoader(), "protobuf-kafka-ingress.yaml");
  JsonIngressSpec<?> spec =
      new JsonIngressSpec<>(
          ProtobufKafkaIngressTypes.PROTOBUF_KAFKA_INGRESS_TYPE,
          new IngressIdentifier<>(Message.class, "foo", "bar"),
          ingressDefinition);

  ProtobufKafkaSourceProvider provider = new ProtobufKafkaSourceProvider();
  SourceFunction<?> source = provider.forSpec(spec);

  assertThat(source, instanceOf(FlinkKafkaConsumer.class));
}

Example #17

Source File: KafkaDataSource.java From flink-simple-tutorial with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // 配置 kafka 连接参数
        String topic = "flink";
        String bootStrapServers = "192.168.56.103:9092";
        String zkConnect = "192.168.56.103:2181";
        String groupID = "group_A";
        Properties prop = new Properties();
        prop.setProperty("bootstrap.servers", bootStrapServers);
//        prop.setProperty("zookeeper.connect", zkConnect);
        prop.setProperty("group.id", groupID);

        // 创建 kafka connector source
//        FlinkKafkaConsumer010<String> consumer010 = new FlinkKafkaConsumer010<>(topic, new SimpleStringSchema(), prop);
        FlinkKafkaConsumer<String> stringFlinkKafkaConsumer = new FlinkKafkaConsumer<>(topic, new SimpleStringSchema(), prop);

        // add source
        DataStreamSource<String> dataStream = env.addSource(stringFlinkKafkaConsumer);

        dataStream.print();

        env.execute("Flink kafka test");
    }

Example #18

Source File: KafkaSourceBuilder.java From Alink with Apache License 2.0

5 votes

@Override
public RichParallelSourceFunction<Row> build() {
    FlinkKafkaConsumer<Row> consumer;
    if (!StringUtils.isNullOrWhitespaceOnly(topicPattern)) {
        Pattern pattern = Pattern.compile(topicPattern);
        consumer = new FlinkKafkaConsumer<Row>(pattern, new MessageDeserialization(), properties);
    } else {
        consumer = new FlinkKafkaConsumer<Row>(topic, new MessageDeserialization(), properties);
    }

    switch (super.startupMode) {
        case LATEST: {
            consumer.setStartFromLatest();
            break;
        }
        case EARLIEST: {
            consumer.setStartFromEarliest();
            break;
        }
        case GROUP_OFFSETS: {
            consumer.setStartFromGroupOffsets();
            break;
        }
        case TIMESTAMP: {
            consumer.setStartFromTimestamp(startTimeMs);
            break;
        }
        default: {
            throw new IllegalArgumentException("invalid startupMode.");
        }
    }

    return consumer;
}

Example #19

Source File: Kafka240String.java From blog_demos with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    //设置并行度
    env.setParallelism(2);

    Properties properties = new Properties();
    //broker地址
    properties.setProperty("bootstrap.servers", "192.168.50.43:9092");
    //zookeeper地址
    properties.setProperty("zookeeper.connect", "192.168.50.43:2181");
    //消费者的groupId
    properties.setProperty("group.id", "flink-connector");
    //实例化Consumer类
    FlinkKafkaConsumer<String> flinkKafkaConsumer = new FlinkKafkaConsumer<>(
            "test001",
            new SimpleStringSchema(),
            properties
    );
    //指定从最新位置开始消费，相当于放弃历史消息
    flinkKafkaConsumer.setStartFromLatest();

    //通过addSource方法得到DataSource
    DataStream<String> dataStream = env.addSource(flinkKafkaConsumer);

    //从kafka取得字符串消息后，分割成单词，统计数量，窗口是5秒
    dataStream
            .flatMap(new Splitter())
            .keyBy(0)
            .timeWindow(Time.seconds(5))
            .sum(1)
            .print();

    env.execute("Connector DataSource demo : kafka");
}

Example #20

Source File: Kafka240Bean.java From blog_demos with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    //设置并行度
    env.setParallelism(2);

    Properties properties = new Properties();
    //broker地址
    properties.setProperty("bootstrap.servers", "192.168.50.43:9092");
    //zookeeper地址
    properties.setProperty("zookeeper.connect", "192.168.50.43:2181");
    //消费者的groupId
    properties.setProperty("group.id", "flink-connector");
    //实例化Consumer类
    FlinkKafkaConsumer<Student> flinkKafkaConsumer = new FlinkKafkaConsumer<>(
            "test001",
            new StudentSchema(),
            properties
    );
    //指定从最新位置开始消费，相当于放弃历史消息
    flinkKafkaConsumer.setStartFromLatest();

    //通过addSource方法得到DataSource
    DataStream<Student> dataStream = env.addSource(flinkKafkaConsumer);

    //从kafka取得的JSON被反序列化成Student实例，统计每个name的数量，窗口是5秒
    dataStream.map(new MapFunction<Student, Tuple2<String, Integer>>() {
        @Override
        public Tuple2<String, Integer> map(Student student) throws Exception {
            return new Tuple2<>(student.getName(), 1);
        }
    })
            .keyBy(0)
            .timeWindow(Time.seconds(5))
            .sum(1)
            .print();

    env.execute("Connector DataSource demo : kafka bean");
}

Example #21

Source File: KafkaDynamicSource.java From flink with Apache License 2.0

5 votes

@Override
protected FlinkKafkaConsumerBase<RowData> createKafkaConsumer(
		String topic,
		Properties properties,
		DeserializationSchema<RowData> deserializationSchema) {
	return new FlinkKafkaConsumer<>(topic, deserializationSchema, properties);
}

Example #22

Source File: FlinkKafkaReader.java From flink-perf with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	final ParameterTool pt = ParameterTool.fromArgs(args);

	DataStreamSource<Event> src = see.addSource(new FlinkKafkaConsumer<Event>(pt.getRequired("topic"), new EventDeSerializer(), pt.getProperties()));

	src.flatMap(new FlatMapFunction<Event, Integer>() {
		long received = 0;
		int logfreq = pt.getInt("logfreq");
		long lastLog = -1;
		long lastElements = 0;

		@Override
		public void flatMap(Event event, Collector<Integer> collector) throws Exception {


			// print some stats
			received++;
			if (received % logfreq == 0) {
				// throughput over entire time
				long now = System.currentTimeMillis();

				// throughput for the last "logfreq" elements
				if(lastLog == -1) {
					// init (the first)
					lastLog = now;
					lastElements = received;
				} else {
					long timeDiff = now - lastLog;
					long elementDiff = received - lastElements;
					double ex = (1000/(double)timeDiff);
					LOG.info("During the last {} ms, we received {} elements. That's {} elements/second/core. Total read {}", timeDiff, elementDiff, elementDiff*ex, received);
					// reinit
					lastLog = now;
					lastElements = received;
				}
			}
		}
	});

	see.execute();
}

Example #23

Source File: KafkaDynamicTableFactoryTest.java From flink with Apache License 2.0

4 votes

@Override
protected Class<?> getExpectedConsumerClass() {
	return FlinkKafkaConsumer.class;
}

Example #24

Source File: CassandraTuple2Sink.java From blog_demos with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    //设置并行度
    env.setParallelism(1);

    //连接kafka用到的属性对象
    Properties properties = new Properties();
    //broker地址
    properties.setProperty("bootstrap.servers", "192.168.50.43:9092");
    //zookeeper地址
    properties.setProperty("zookeeper.connect", "192.168.50.43:2181");
    //消费者的groupId
    properties.setProperty("group.id", "flink-connector");
    //实例化Consumer类
    FlinkKafkaConsumer<String> flinkKafkaConsumer = new FlinkKafkaConsumer<>(
            "test001",
            new SimpleStringSchema(),
            properties
    );

    //指定从最新位置开始消费，相当于放弃历史消息
    flinkKafkaConsumer.setStartFromLatest();

    //通过addSource方法得到DataSource
    DataStream<String> dataStream = env.addSource(flinkKafkaConsumer);

    DataStream<Tuple2<String, Long>> result = dataStream
            .flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
                         @Override
                         public void flatMap(String value, Collector<Tuple2<String, Long>> out) {
                             String[] words = value.toLowerCase().split("\\s");

                             for (String word : words) {
                                 //cassandra的表中，每个word都是主键，因此不能为空
                                 if (!word.isEmpty()) {
                                     out.collect(new Tuple2<String, Long>(word, 1L));
                                 }
                             }
                         }
                     }
            )
            .keyBy(0)
            .timeWindow(Time.seconds(5))
            .sum(1);

    result.addSink(new PrintSinkFunction<>())
            .name("print Sink")
            .disableChaining();

    CassandraSink.addSink(result)
            .setQuery("INSERT INTO example.wordcount(word, count) values (?, ?);")
            .setHost("192.168.133.168")
            .build()
            .name("cassandra Sink")
            .disableChaining();

    env.execute("kafka-2.4 source, cassandra-3.11.6 sink, tuple2");
}

Example #25

Source File: CassandraPojoSink.java From blog_demos with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    //设置并行度
    env.setParallelism(1);

    //连接kafka用到的属性对象
    Properties properties = new Properties();
    //broker地址
    properties.setProperty("bootstrap.servers", "192.168.50.43:9092");
    //zookeeper地址
    properties.setProperty("zookeeper.connect", "192.168.50.43:2181");
    //消费者的groupId
    properties.setProperty("group.id", "flink-connector");
    //实例化Consumer类
    FlinkKafkaConsumer<String> flinkKafkaConsumer = new FlinkKafkaConsumer<>(
            "test001",
            new SimpleStringSchema(),
            properties
    );

    //指定从最新位置开始消费，相当于放弃历史消息
    flinkKafkaConsumer.setStartFromLatest();

    //通过addSource方法得到DataSource
    DataStream<String> dataStream = env.addSource(flinkKafkaConsumer);

    DataStream<WordCount> result = dataStream
            .flatMap(new FlatMapFunction<String, WordCount>() {
                @Override
                public void flatMap(String s, Collector<WordCount> collector) throws Exception {
                    String[] words = s.toLowerCase().split("\\s");

                    for (String word : words) {
                        if (!word.isEmpty()) {
                            //cassandra的表中，每个word都是主键，因此不能为空
                            collector.collect(new WordCount(word, 1L));
                        }
                    }
                }
            })
            .keyBy("word")
            .timeWindow(Time.seconds(5))
            .reduce(new ReduceFunction<WordCount>() {
                @Override
                public WordCount reduce(WordCount wordCount, WordCount t1) throws Exception {
                    return new WordCount(wordCount.getWord(), wordCount.getCount() + t1.getCount());
                }
            });

    result.addSink(new PrintSinkFunction<>())
            .name("print Sink")
            .disableChaining();

    CassandraSink.addSink(result)
            .setHost("192.168.133.168")
            .setMapperOptions(() -> new Mapper.Option[] { Mapper.Option.saveNullFields(true) })
            .build()
            .name("cassandra Sink")
            .disableChaining();

    env.execute("kafka-2.4 source, cassandra-3.11.6 sink, pojo");
}

Example #26

Source File: ClickEventCount.java From flink-playgrounds with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
	final ParameterTool params = ParameterTool.fromArgs(args);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	configureEnvironment(params, env);

	boolean inflictBackpressure = params.has(BACKPRESSURE_OPTION);

	String inputTopic = params.get("input-topic", "input");
	String outputTopic = params.get("output-topic", "output");
	String brokers = params.get("bootstrap.servers", "localhost:9092");
	Properties kafkaProps = new Properties();
	kafkaProps.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers);
	kafkaProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "click-event-count");

	DataStream<ClickEvent> clicks =
			env.addSource(new FlinkKafkaConsumer<>(inputTopic, new ClickEventDeserializationSchema(), kafkaProps))
		.name("ClickEvent Source")
		.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ClickEvent>(Time.of(200, TimeUnit.MILLISECONDS)) {
			@Override
			public long extractTimestamp(final ClickEvent element) {
				return element.getTimestamp().getTime();
			}
		});

	if (inflictBackpressure) {
		// Force a network shuffle so that the backpressure will affect the buffer pools
		clicks = clicks
			.keyBy(ClickEvent::getPage)
			.map(new BackpressureMap())
			.name("Backpressure");
	}

	DataStream<ClickEventStatistics> statistics = clicks
		.keyBy(ClickEvent::getPage)
		.timeWindow(WINDOW_SIZE)
		.aggregate(new CountingAggregator(),
			new ClickEventStatisticsCollector())
		.name("ClickEvent Counter");

	statistics
		.addSink(new FlinkKafkaProducer<>(
			outputTopic,
			new ClickEventStatisticsSerializationSchema(outputTopic),
			kafkaProps,
			FlinkKafkaProducer.Semantic.AT_LEAST_ONCE))
		.name("ClickEventStatistics Sink");

	env.execute("Click Event Count");
}