package com.packt.streaming.receiver; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.function.Function2; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.streaming.Duration; import org.apache.spark.streaming.api.java.JavaDStream; import org.apache.spark.streaming.api.java.JavaPairDStream; import org.apache.spark.streaming.api.java.JavaPairReceiverInputDStream; import org.apache.spark.streaming.api.java.JavaStreamingContext; import org.apache.spark.streaming.kafka.KafkaUtils; import scala.Tuple2; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.regex.Pattern; public class KafkaReceiverWordCountJava { private static final Pattern WORD_DELIMETER = Pattern.compile(" "); public static void main(String[] args) throws Exception { String zkQuorum = "localhost:2181"; String groupName = "stream"; int numThreads = 3; String topicsName = "test1"; SparkConf sparkConf = new SparkConf().setAppName("WordCountKafkaStream"); JavaStreamingContext javaStreamingContext = new JavaStreamingContext(sparkConf, new Duration(5000)); Map<String, Integer> topicToBeUsedBySpark = new HashMap<>(); String[] topics = topicsName.split(","); for (String topic : topics) { topicToBeUsedBySpark.put(topic, numThreads); } JavaPairReceiverInputDStream<String, String> streamMessages = KafkaUtils.createStream(javaStreamingContext, zkQuorum, groupName, topicToBeUsedBySpark); JavaDStream<String> lines = streamMessages.map(new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Arrays.asList(WORD_DELIMETER.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); javaStreamingContext.start(); javaStreamingContext.awaitTermination(); } }