org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer Java Examples

The following examples show how to use org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: KafkaExample.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
		.addSource(
			new FlinkKafkaConsumer<>(
				parameterTool.getRequired("input-topic"),
				new KafkaEventSchema(),
				parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(
		new FlinkKafkaProducer<>(
			parameterTool.getRequired("output-topic"),
			new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()),
			parameterTool.getProperties(),
			FlinkKafkaProducer.Semantic.EXACTLY_ONCE));

	env.execute("Modern Kafka Example");
}
 
Example #2
Source File: KafkaExample.java    From flink with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
		.addSource(
			new FlinkKafkaConsumer<>(
				parameterTool.getRequired("input-topic"),
				new KafkaEventSchema(),
				parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(
		new FlinkKafkaProducer<>(
			parameterTool.getRequired("output-topic"),
			new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()),
			parameterTool.getProperties(),
			FlinkKafkaProducer.Semantic.EXACTLY_ONCE));

	env.execute("Modern Kafka Example");
}
 
Example #3
Source File: KafkaExample.java    From flink with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
		.addSource(
			new FlinkKafkaConsumer<>(
				parameterTool.getRequired("input-topic"),
				new KafkaEventSchema(),
				parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(
		new FlinkKafkaProducer<>(
			parameterTool.getRequired("output-topic"),
			new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()),
			parameterTool.getProperties(),
			FlinkKafkaProducer.Semantic.EXACTLY_ONCE));

	env.execute("Modern Kafka Example");
}
 
Example #4
Source File: KafkaSinkProvider.java    From stateful-functions with Apache License 2.0 6 votes vote down vote up
@Override
public <T> SinkFunction<T> forSpec(EgressSpec<T> egressSpec) {
  KafkaEgressSpec<T> spec = asSpec(egressSpec);

  Properties properties = new Properties();
  properties.putAll(spec.properties());
  properties.put("bootstrap.servers", spec.kafkaAddress());

  Semantic producerSemantic = semanticFromSpec(spec);
  if (producerSemantic == Semantic.EXACTLY_ONCE) {
    properties.put("transaction.timeout.ms", spec.transactionTimeoutDuration().toMillis());
  }

  return new FlinkKafkaProducer<>(
      randomKafkaTopic(),
      serializerFromSpec(spec),
      properties,
      producerSemantic,
      spec.kafkaProducerPoolSize());
}
 
Example #5
Source File: AvroDataGeneratorJob.java    From flink-tutorials with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	ParameterTool params = Utils.parseArgs(args);
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	KafkaSerializationSchema<Message> schema = ClouderaRegistryKafkaSerializationSchema.<Message>
			builder(params.getRequired(K_KAFKA_TOPIC))
			.setConfig(Utils.readSchemaRegistryProperties(params))
			.setKey(Message::getId)
			.build();

	FlinkKafkaProducer<Message> kafkaSink = new FlinkKafkaProducer<>(
			"default", schema, Utils.readKafkaProperties(params), FlinkKafkaProducer.Semantic.AT_LEAST_ONCE);

	DataStream<Message> input = env.addSource(new DataGeneratorSource()).name("Data Generator Source");

	input.addSink(kafkaSink)
			.name("Kafka Sink")
			.uid("Kafka Sink");

	input.print();

	env.execute("Data Generator Job");
}
 
Example #6
Source File: KafkaSinkProvider.java    From flink-statefun with Apache License 2.0 6 votes vote down vote up
@Override
public <T> SinkFunction<T> forSpec(EgressSpec<T> egressSpec) {
  KafkaEgressSpec<T> spec = asSpec(egressSpec);

  Properties properties = new Properties();
  properties.putAll(spec.properties());
  properties.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, spec.kafkaAddress());

  Semantic producerSemantic = semanticFromSpec(spec);
  if (producerSemantic == Semantic.EXACTLY_ONCE) {
    properties.setProperty(
        ProducerConfig.TRANSACTION_TIMEOUT_CONFIG,
        String.valueOf(spec.transactionTimeoutDuration().toMillis()));
  }

  return new FlinkKafkaProducer<>(
      randomKafkaTopic(),
      serializerFromSpec(spec),
      properties,
      producerSemantic,
      spec.kafkaProducerPoolSize());
}
 
Example #7
Source File: ClickEventCount.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	final ParameterTool params = ParameterTool.fromArgs(args);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	configureEnvironment(params, env);

	String inputTopic = params.get("input-topic", "input");
	String outputTopic = params.get("output-topic", "output");
	String brokers = params.get("bootstrap.servers", "localhost:9092");
	Properties kafkaProps = new Properties();
	kafkaProps.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers);
	kafkaProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "click-event-count");

	env.addSource(new FlinkKafkaConsumer<>(inputTopic, new ClickEventDeserializationSchema(), kafkaProps))
		.name("ClickEvent Source")
		.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ClickEvent>(Time.of(200, TimeUnit.MILLISECONDS)) {
			@Override
			public long extractTimestamp(final ClickEvent element) {
				return element.getTimestamp().getTime();
			}
		})
		.keyBy(ClickEvent::getPage)
		.timeWindow(WINDOW_SIZE)
		.aggregate(new CountingAggregator(),
			new ClickEventStatisticsCollector())
		.name("ClickEvent Counter")
		.addSink(new FlinkKafkaProducer<>(
			outputTopic,
			new ClickEventStatisticsSerializationSchema(outputTopic),
			kafkaProps,
			FlinkKafkaProducer.Semantic.AT_LEAST_ONCE))
		.name("ClickEventStatistics Sink");

	env.execute("Click Event Count");
}
 
Example #8
Source File: KafkaItemTransactionJob.java    From flink-tutorials with Apache License 2.0 5 votes vote down vote up
public void writeQueryOutput(ParameterTool params, DataStream<QueryResult> queryResultStream) {
	// Query output is written back to kafka in a tab delimited format for readability
	FlinkKafkaProducer<QueryResult> queryOutputSink = new FlinkKafkaProducer<>(
			params.getRequired(QUERY_OUTPUT_TOPIC_KEY), new QueryResultSchema(),
			Utils.readKafkaProperties(params, false),
			Optional.of(new HashingKafkaPartitioner<>()));

	queryResultStream
			.addSink(queryOutputSink)
			.name("Kafka Query Result Sink")
			.uid("Kafka Query Result Sink");
}
 
Example #9
Source File: KafkaDataGeneratorJob.java    From flink-tutorials with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	if (args.length != 1) {
		throw new RuntimeException("Path to the properties file is expected as the only argument.");
	}
	ParameterTool params = ParameterTool.fromPropertiesFile(args[0]);

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	DataStream<ItemTransaction> generatedInput =
			env.addSource(new ItemTransactionGeneratorSource(params))
					.name("Item Transaction Generator");

	FlinkKafkaProducer<ItemTransaction> kafkaSink = new FlinkKafkaProducer<>(
			params.getRequired(KafkaItemTransactionJob.TRANSACTION_INPUT_TOPIC_KEY),
			new TransactionSchema(),
			Utils.readKafkaProperties(params, false),
			Optional.empty());

	generatedInput.keyBy("itemId").addSink(kafkaSink).name("Transaction Kafka Sink");

	if (params.getBoolean(GENERATE_QUERIES, false)) {
		DataStream<Query> queries = env.addSource(new QueryGeneratorSource(params))
				.name("Query Generator");

		FlinkKafkaProducer<Query> querySink = new FlinkKafkaProducer<>(
				params.getRequired(KafkaItemTransactionJob.QUERY_INPUT_TOPIC_KEY),
				new QuerySchema(),
				Utils.readKafkaProperties(params, false),
				Optional.empty());

		queries.keyBy("itemId").addSink(querySink).name("Query Kafka Sink");
	}

	env.execute("Kafka Data generator");
}
 
Example #10
Source File: GenericKafkaSinkProviderTest.java    From flink-statefun with Apache License 2.0 5 votes vote down vote up
@Test
public void exampleUsage() {
  JsonNode egressDefinition =
      loadAsJsonFromClassResource(getClass().getClassLoader(), "generic-kafka-egress.yaml");
  JsonEgressSpec<?> spec =
      new JsonEgressSpec<>(
          KafkaEgressTypes.GENERIC_KAFKA_EGRESS_TYPE,
          new EgressIdentifier<>("foo", "bar", Any.class),
          egressDefinition);

  GenericKafkaSinkProvider provider = new GenericKafkaSinkProvider();
  SinkFunction<?> sink = provider.forSpec(spec);

  assertThat(sink, instanceOf(FlinkKafkaProducer.class));
}
 
Example #11
Source File: KafkaStrSink.java    From blog_demos with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    //并行度为1
    env.setParallelism(1);

    Properties properties = new Properties();
    properties.setProperty("bootstrap.servers", "192.168.50.43:9092");

    String topic = "test006";
    FlinkKafkaProducer<String> producer = new FlinkKafkaProducer<>(topic,
            new ProducerStringSerializationSchema(topic),
            properties,
            FlinkKafkaProducer.Semantic.EXACTLY_ONCE);


    //创建一个List,里面有两个Tuple2元素
    List<String> list = new ArrayList<>();
    list.add("aaa");
    list.add("bbb");
    list.add("ccc");
    list.add("ddd");
    list.add("eee");
    list.add("fff");
    list.add("aaa");

    //统计每个单词的数量
    env.fromCollection(list)
       .addSink(producer)
       .setParallelism(4);

    env.execute("sink demo : kafka str");
}
 
Example #12
Source File: KafkaObjSink.java    From blog_demos with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    //并行度为1
    env.setParallelism(1);

    Properties properties = new Properties();
    //kafka的broker地址
    properties.setProperty("bootstrap.servers", "192.168.50.43:9092");

    String topic = "test006";
    FlinkKafkaProducer<Tuple2<String, Integer>> producer = new FlinkKafkaProducer<>(topic,
            new ObjSerializationSchema(topic),
            properties,
            FlinkKafkaProducer.Semantic.EXACTLY_ONCE);

    //创建一个List,里面有两个Tuple2元素
    List<Tuple2<String, Integer>> list = new ArrayList<>();
    list.add(new Tuple2("aaa", 1));
    list.add(new Tuple2("bbb", 1));
    list.add(new Tuple2("ccc", 1));
    list.add(new Tuple2("ddd", 1));
    list.add(new Tuple2("eee", 1));
    list.add(new Tuple2("fff", 1));
    list.add(new Tuple2("aaa", 1));


    //统计每个单词的数量
    env.fromCollection(list)
        .keyBy(0)
        .sum(1)
        .addSink(producer)
        .setParallelism(4);


    env.execute("sink demo : kafka obj");
}
 
Example #13
Source File: KafkaDynamicSink.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected SinkFunction<RowData> createKafkaProducer(
		String topic,
		Properties properties,
		SerializationSchema<RowData> serializationSchema,
		Optional<FlinkKafkaPartitioner<RowData>> partitioner) {
	return new FlinkKafkaProducer<>(
			topic,
			serializationSchema,
			properties,
			partitioner);
}
 
Example #14
Source File: KafkaEventsGeneratorJob.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		double errorRate = params.getDouble("error-rate", 0.0);
		int sleep = params.getInt("sleep", 1);

		String kafkaTopic = params.get("kafka-topic");
		String brokers = params.get("brokers", "localhost:9092");

		System.out.printf("Generating events to Kafka with standalone source with error rate %f and sleep delay %s millis\n", errorRate, sleep);
		System.out.println();

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		env
			.addSource(new EventsGeneratorSource(errorRate, sleep))
			.addSink(new FlinkKafkaProducer<>(brokers, kafkaTopic, new EventDeSerializer()));

		// trigger program execution
		env.execute("State machine example Kafka events generator job");
	}
 
Example #15
Source File: KafkaDynamicTableFactoryTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
protected Class<?> getExpectedProducerClass() {
	return FlinkKafkaProducer.class;
}
 
Example #16
Source File: KafkaShuffleTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
@BeforeClass
public static void prepare() throws Exception {
	KafkaProducerTestBase.prepare();
	((KafkaTestEnvironmentImpl) kafkaServer).setProducerSemantic(FlinkKafkaProducer.Semantic.EXACTLY_ONCE);
}
 
Example #17
Source File: KafkaSinkBuilder.java    From Alink with Apache License 2.0 4 votes vote down vote up
@Override
public RichSinkFunction<Row> build() {
    SerializationSchema<Row> serializationSchema = getSerializationSchema();
    return new FlinkKafkaProducer<Row>(topic, serializationSchema, properties);
}
 
Example #18
Source File: KafkaEventsGeneratorJob.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		double errorRate = params.getDouble("error-rate", 0.0);
		int sleep = params.getInt("sleep", 1);

		String kafkaTopic = params.get("kafka-topic");
		String brokers = params.get("brokers", "localhost:9092");

		System.out.printf("Generating events to Kafka with standalone source with error rate %f and sleep delay %s millis\n", errorRate, sleep);
		System.out.println();

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		env
			.addSource(new EventsGeneratorSource(errorRate, sleep))
			.addSink(new FlinkKafkaProducer<>(brokers, kafkaTopic, new EventDeSerializer()));

		// trigger program execution
		env.execute("State machine example Kafka events generator job");
	}
 
Example #19
Source File: ClickEventCount.java    From flink-playgrounds with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	final ParameterTool params = ParameterTool.fromArgs(args);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	configureEnvironment(params, env);

	boolean inflictBackpressure = params.has(BACKPRESSURE_OPTION);

	String inputTopic = params.get("input-topic", "input");
	String outputTopic = params.get("output-topic", "output");
	String brokers = params.get("bootstrap.servers", "localhost:9092");
	Properties kafkaProps = new Properties();
	kafkaProps.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers);
	kafkaProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "click-event-count");

	DataStream<ClickEvent> clicks =
			env.addSource(new FlinkKafkaConsumer<>(inputTopic, new ClickEventDeserializationSchema(), kafkaProps))
		.name("ClickEvent Source")
		.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ClickEvent>(Time.of(200, TimeUnit.MILLISECONDS)) {
			@Override
			public long extractTimestamp(final ClickEvent element) {
				return element.getTimestamp().getTime();
			}
		});

	if (inflictBackpressure) {
		// Force a network shuffle so that the backpressure will affect the buffer pools
		clicks = clicks
			.keyBy(ClickEvent::getPage)
			.map(new BackpressureMap())
			.name("Backpressure");
	}

	DataStream<ClickEventStatistics> statistics = clicks
		.keyBy(ClickEvent::getPage)
		.timeWindow(WINDOW_SIZE)
		.aggregate(new CountingAggregator(),
			new ClickEventStatisticsCollector())
		.name("ClickEvent Counter");

	statistics
		.addSink(new FlinkKafkaProducer<>(
			outputTopic,
			new ClickEventStatisticsSerializationSchema(outputTopic),
			kafkaProps,
			FlinkKafkaProducer.Semantic.AT_LEAST_ONCE))
		.name("ClickEventStatistics Sink");

	env.execute("Click Event Count");
}
 
Example #20
Source File: FlinkKafkaShuffle.java    From flink with Apache License 2.0 3 votes vote down vote up
/**
 * The write side of {@link FlinkKafkaShuffle#persistentKeyBy}.
 *
 * <p>This function contains a {@link FlinkKafkaShuffleProducer} to shuffle and persist data in Kafka.
 * {@link FlinkKafkaShuffleProducer} uses the same key group assignment function
 * {@link KeyGroupRangeAssignment#assignKeyToParallelOperator} to decide which partition a key goes.
 * Hence, each producer task can potentially write to each Kafka partition based on the key.
 * Here, the number of partitions equals to the key group size.
 * In the case of using {@link TimeCharacteristic#EventTime}, each producer task broadcasts each watermark
 * to all of the Kafka partitions to make sure watermark information is propagated properly.
 *
 * <p>Attention: make sure kafkaProperties include
 * {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} and {@link FlinkKafkaShuffle#PARTITION_NUMBER} explicitly.
 * {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} is the parallelism of the producer.
 * {@link FlinkKafkaShuffle#PARTITION_NUMBER} is the number of partitions.
 * They are not necessarily the same and allowed to be set independently.
 *
 * @see FlinkKafkaShuffle#persistentKeyBy
 * @see FlinkKafkaShuffle#readKeyBy
 *
 * @param dataStream 			Data stream to be shuffled
 * @param topic 				Kafka topic written to
 * @param kafkaProperties 		Kafka properties for Kafka Producer
 * @param keySelector 			Key selector to retrieve key from `dataStream'
 * @param <T> 					Type of the input data stream
 * @param <K> 					Type of key
 */
public static <T, K> void writeKeyBy(
		DataStream<T> dataStream,
		String topic,
		Properties kafkaProperties,
		KeySelector<T, K> keySelector) {

	StreamExecutionEnvironment env = dataStream.getExecutionEnvironment();
	TypeSerializer<T> typeSerializer = dataStream.getType().createSerializer(env.getConfig());

	// write data to Kafka
	FlinkKafkaShuffleProducer<T, K> kafkaProducer = new FlinkKafkaShuffleProducer<>(
		topic,
		typeSerializer,
		kafkaProperties,
		env.clean(keySelector),
		FlinkKafkaProducer.Semantic.EXACTLY_ONCE,
		FlinkKafkaProducer.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE);

	// make sure the sink parallelism is set to producerParallelism
	Preconditions.checkArgument(
		kafkaProperties.getProperty(PRODUCER_PARALLELISM) != null,
		"Missing producer parallelism for Kafka Shuffle");
	int producerParallelism = PropertiesUtil.getInt(kafkaProperties, PRODUCER_PARALLELISM, Integer.MIN_VALUE);

	addKafkaShuffle(dataStream, kafkaProducer, producerParallelism);
}
 
Example #21
Source File: RandomKafkaDataGeneratorJob.java    From flink-tutorials with Apache License 2.0 3 votes vote down vote up
public static void main(String[] args) throws Exception {
    ParameterTool params = Utils.parseArgs(args);
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();


    FlinkKafkaProducer<String> kafkaSink = new FlinkKafkaProducer<String>(params.getRequired(K_KAFKA_TOPIC), new SimpleStringSchema(), Utils.readKafkaProperties(params));

    DataStream<String> input = env.addSource(new UUIDGeneratorSource()).name("Data Generator Source");

    input.addSink(kafkaSink).name("Kafka Sink").uid("Kafka Sink");

    input.print();

    env.execute("Data Generator Job");
}