org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer09 Java Examples

The following examples show how to use org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer09. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WriteToKafka.java    From kafka-flink-101 with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

  Properties properties = new Properties();
  properties.setProperty("bootstrap.servers", "localhost:9092");

  DataStream<String> stream = env.addSource(new SimpleStringGenerator());
  stream.addSink(new FlinkKafkaProducer09<>("flink-demo", new SimpleStringSchema(), properties));

  env.execute();
}
 
Example #2
Source File: TwitterIntoKafka.java    From flink-streaming-etl with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	// set up the streaming execution environment
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	ParameterTool params = ParameterTool.fromPropertiesFile(args[0]);
	DataStream<String> twitterStreamString = env.addSource(new TwitterSource(params.getProperties()));
	DataStream<String> filteredStream = twitterStreamString.flatMap(new ParseJson());
	filteredStream.flatMap(new ThroughputLogger(5000L)).setParallelism(1);

	filteredStream.addSink(new FlinkKafkaProducer09<>("twitter", new SimpleStringSchema(), params.getProperties()));

	// execute program
	env.execute("Ingest data from Twitter to Kafka");
}
 
Example #3
Source File: StreamingETL.java    From flink-streaming-etl with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
	// parse arguments
	ParameterTool params = ParameterTool.fromPropertiesFile(args[0]);

	// create streaming environment
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// enable event time processing
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	// enable fault-tolerance
	env.enableCheckpointing(1000);

	// enable restarts
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(50, 500L));

	env.setStateBackend(new FsStateBackend("file:///home/robert/flink-workdir/flink-streaming-etl/state-backend"));

	// run each operator separately
	env.disableOperatorChaining();

	// get data from Kafka
	Properties kParams = params.getProperties();
	kParams.setProperty("group.id", UUID.randomUUID().toString());
	DataStream<ObjectNode> inputStream = env.addSource(new FlinkKafkaConsumer09<>(params.getRequired("topic"), new JSONDeserializationSchema(), kParams)).name("Kafka 0.9 Source")
		.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ObjectNode>(Time.minutes(1L)) {
			@Override
			public long extractTimestamp(ObjectNode jsonNodes) {
				return jsonNodes.get("timestamp_ms").asLong();
			}
		}).name("Timestamp extractor");

	// filter out records without lang field
	DataStream<ObjectNode> tweetsWithLang = inputStream.filter(jsonNode -> jsonNode.has("user") && jsonNode.get("user").has("lang")).name("Filter records without 'lang' field");

	// select only lang = "en" tweets
	DataStream<ObjectNode> englishTweets = tweetsWithLang.filter(jsonNode -> jsonNode.get("user").get("lang").asText().equals("en")).name("Select 'lang'=en tweets");

	// write to file system
	RollingSink<ObjectNode> rollingSink = new RollingSink<>(params.get("sinkPath", "/home/robert/flink-workdir/flink-streaming-etl/rolling-sink"));
	rollingSink.setBucketer(new DateTimeBucketer("yyyy-MM-dd-HH-mm")); // do a bucket for each minute
	englishTweets.addSink(rollingSink).name("Rolling FileSystem Sink");

	// build aggregates (count per language) using window (10 seconds tumbling):
	DataStream<Tuple3<Long, String, Long>> languageCounts = tweetsWithLang.keyBy(jsonNode -> jsonNode.get("user").get("lang").asText())
		.timeWindow(Time.seconds(10))
		.apply(new Tuple3<>(0L, "", 0L), new JsonFoldCounter(), new CountEmitter()).name("Count per Langauage (10 seconds tumbling)");

	// write window aggregate to ElasticSearch
	List<InetSocketAddress> transportNodes = ImmutableList.of(new InetSocketAddress(InetAddress.getByName("localhost"), 9300));
	ElasticsearchSink<Tuple3<Long, String, Long>> elasticsearchSink = new ElasticsearchSink<>(params.toMap(), transportNodes, new ESRequest());

	languageCounts.addSink(elasticsearchSink).name("ElasticSearch2 Sink");

	// word-count on the tweet stream
	DataStream<Tuple2<Date, List<Tuple2<String, Long>>>> topWordCount = tweetsWithLang
		// get text from tweets
		.map(tweet -> tweet.get("text").asText()).name("Get text from Tweets")
		// split text into (word, 1) tuples
		.flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
			@Override
			public void flatMap(String s, Collector<Tuple2<String, Long>> collector) throws Exception {
				String[] splits = s.split(" ");
				for (String sp : splits) {
					collector.collect(new Tuple2<>(sp, 1L));
				}
			}
		}).name("Tokenize words")
		// group by word
		.keyBy(0)
		// build 1 min windows, compute every 10 seconds --> count word frequency
		.timeWindow(Time.minutes(1L), Time.seconds(10L)).apply(new WordCountingWindow()).name("Count word frequency (1 min, 10 sec sliding window)")
		// build top n every 10 seconds
		.timeWindowAll(Time.seconds(10L)).apply(new TopNWords(10)).name("TopN Window (10s)");

	// write top Ns to Kafka topic
	topWordCount.addSink(new FlinkKafkaProducer09<>(params.getRequired("wc-topic"), new ListSerSchema(), params.getProperties())).name("Write topN to Kafka");

	env.execute("Streaming ETL");

}
 
Example #4
Source File: Kafka09AvroTableSink.java    From df_data_service with Apache License 2.0 4 votes vote down vote up
@Override
protected FlinkKafkaProducerBase<Tuple2<Boolean, Row>> createKafkaProducer(String topic, Properties properties, SerializationSchema<Tuple2<Boolean, Row>> serializationSchema, FlinkKafkaPartitioner<Tuple2<Boolean, Row>> partitioner) {
	return new FlinkKafkaProducer09<>(topic, serializationSchema, properties, partitioner);
}
 
Example #5
Source File: UnitTestSuiteFlink.java    From df_data_service with Apache License 2.0 4 votes vote down vote up
public static void testFlinkSQL() {

        LOG.info("Only Unit Testing Function is enabled");
        String resultFile = "/home/vagrant/test.txt";

        try {

            String jarPath = DFInitService.class.getProtectionDomain().getCodeSource().getLocation().getPath();
            StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", 6123, jarPath)
                    .setParallelism(1);
            String kafkaTopic = "finance";
            String kafkaTopic_stage = "df_trans_stage_finance";
            String kafkaTopic_out = "df_trans_out_finance";



            StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
            Properties properties = new Properties();
            properties.setProperty("bootstrap.servers", "localhost:9092");
            properties.setProperty("group.id", "consumer3");

            // Internal covert Json String to Json - Begin
            DataStream<String> stream = env
                    .addSource(new FlinkKafkaConsumer09<>(kafkaTopic, new SimpleStringSchema(), properties));

            stream.map(new MapFunction<String, String>() {
                @Override
                public String map(String jsonString) throws Exception {
                    return jsonString.replaceAll("\\\\", "").replace("\"{", "{").replace("}\"","}");
                }
            }).addSink(new FlinkKafkaProducer09<String>("localhost:9092", kafkaTopic_stage, new SimpleStringSchema()));
            // Internal covert Json String to Json - End

            String[] fieldNames =  new String[] {"name"};
            Class<?>[] fieldTypes = new Class<?>[] {String.class};

            Kafka09AvroTableSource kafkaTableSource = new Kafka09AvroTableSource(
                    kafkaTopic_stage,
                    properties,
                    fieldNames,
                    fieldTypes);

            //kafkaTableSource.setFailOnMissingField(true);

            tableEnv.registerTableSource("Orders", kafkaTableSource);

            //Table result = tableEnv.sql("SELECT STREAM name FROM Orders");
            Table result = tableEnv.sql("SELECT name FROM Orders");

            Files.deleteIfExists(Paths.get(resultFile));

            // create a TableSink
            TableSink sink = new CsvTableSink(resultFile, "|");
            // write the result Table to the TableSink
            result.writeToSink(sink);

            env.execute("FlinkConsumer");

        } catch (Exception e) {
            e.printStackTrace();
        }
    }