package v1; import lombok.extern.slf4j.Slf4j; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.Function2; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.streaming.Durations; import org.apache.spark.streaming.api.java.JavaDStream; import org.apache.spark.streaming.api.java.JavaPairDStream; import org.apache.spark.streaming.api.java.JavaPairInputDStream; import org.apache.spark.streaming.api.java.JavaStreamingContext; import org.apache.spark.streaming.kafka.KafkaUtils; import scala.Tuple2; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.regex.Pattern; /** * Created by 張燿峰 * sparkStreaming消费kafka消息,实现wordCount * * @author 孤 * @date 2019/4/28 * @Varsion 1.0 */ @Slf4j public class KafkaStreaming { /** * 数据分割的规则 */ private static final Pattern SPACE = Pattern.compile(" "); /** * zookeeper主机 */ private static final String HOST = "192.168.253.132:2181"; /** * 分组ID */ private static final String GROP = "test-consumer-group"; /** * 主题ID */ private static final String TOPIC = "JavaKafka"; /** * 分片 */ private static final Integer THREAD = 1; public static void main(String[] args) { SparkConf sparkConf = new SparkConf().setAppName("KafkaWordCount").setMaster("local[2]"); JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(10000)); //设置检查点 streamingContext.checkpoint("HDFS URL"); Map<String, Integer> topicThread = new HashMap<>(1); topicThread.put(TOPIC, THREAD); JavaPairInputDStream<String, String> dStream = KafkaUtils.createStream(streamingContext, HOST, GROP, topicThread); JavaDStream<String> words = dStream.flatMap((FlatMapFunction<Tuple2<String, String>, String>) stringStringTuple2 -> Arrays.asList(SPACE.split(stringStringTuple2._2)).iterator()); //统计 JavaPairDStream<String, Integer> result = words.mapToPair((PairFunction<String, String, Integer>) s -> new Tuple2<>(s, 1)).reduceByKey((Function2<Integer, Integer, Integer>) (v1, v2) -> v1 + v2); try { result.print(); streamingContext.start(); streamingContext.awaitTermination(); } catch (InterruptedException e) { e.printStackTrace(); } } }