Java Code Examples for org.apache.spark.streaming.api.java.JavaPairDStream#print()
The following examples show how to use
org.apache.spark.streaming.api.java.JavaPairDStream#print() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KafkaStreaming.java From sparkResearch with Apache License 2.0 | 8 votes |
public static void main(String[] args) { SparkConf sparkConf = new SparkConf().setAppName("KafkaWordCount").setMaster("local[2]"); JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(10000)); //设置检查点 streamingContext.checkpoint("HDFS URL"); Map<String, Integer> topicThread = new HashMap<>(1); topicThread.put(TOPIC, THREAD); JavaPairInputDStream<String, String> dStream = KafkaUtils.createStream(streamingContext, HOST, GROP, topicThread); JavaDStream<String> words = dStream.flatMap((FlatMapFunction<Tuple2<String, String>, String>) stringStringTuple2 -> Arrays.asList(SPACE.split(stringStringTuple2._2)).iterator()); //统计 JavaPairDStream<String, Integer> result = words.mapToPair((PairFunction<String, String, Integer>) s -> new Tuple2<>(s, 1)).reduceByKey((Function2<Integer, Integer, Integer>) (v1, v2) -> v1 + v2); try { result.print(); streamingContext.start(); streamingContext.awaitTermination(); } catch (InterruptedException e) { e.printStackTrace(); } }
Example 2
Source File: WordCountTransformOpEx.java From Apache-Spark-2x-for-Java-Developers with MIT License | 5 votes |
public static void main(String[] args) throws Exception { System.setProperty("hadoop.home.dir", "E:\\hadoop"); SparkConf sparkConf = new SparkConf().setAppName("WordCountSocketEx").setMaster("local[*]"); JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1)); Logger rootLogger = LogManager.getRootLogger(); rootLogger.setLevel(Level.WARN); List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<>("hello", 10), new Tuple2<>("world", 10)); JavaPairRDD<String, Integer> initialRDD = streamingContext.sparkContext().parallelizePairs(tuples); JavaReceiverInputDStream<String> StreamingLines = streamingContext.socketTextStream( "10.0.75.1", Integer.parseInt("9000"), StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = StreamingLines.flatMap( str -> Arrays.asList(str.split(" ")).iterator() ); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(str-> new Tuple2<>(str, 1)).reduceByKey((count1,count2) ->count1+count2 ); wordCounts.print(); JavaPairDStream<String, Integer> joinedDstream = wordCounts .transformToPair(new Function<JavaPairRDD<String, Integer>, JavaPairRDD<String, Integer>>() { @Override public JavaPairRDD<String, Integer> call(JavaPairRDD<String, Integer> rdd) throws Exception { JavaPairRDD<String, Integer> modRDD = rdd.join(initialRDD).mapToPair( new PairFunction<Tuple2<String, Tuple2<Integer, Integer>>, String, Integer>() { @Override public Tuple2<String, Integer> call( Tuple2<String, Tuple2<Integer, Integer>> joinedTuple) throws Exception { return new Tuple2<>(joinedTuple._1(),(joinedTuple._2()._1() + joinedTuple._2()._2())); } }); return modRDD; } }); joinedDstream.print(); streamingContext.start(); streamingContext.awaitTermination(); }
Example 3
Source File: FileStreamingEx.java From Apache-Spark-2x-for-Java-Developers with MIT License | 5 votes |
public static void main(String[] args) { //Window Specific property if Hadoop is not instaalled or HADOOP_HOME is not set System.setProperty("hadoop.home.dir", "E:\\hadoop"); //Logger rootLogger = LogManager.getRootLogger(); //rootLogger.setLevel(Level.WARN); SparkConf conf = new SparkConf().setAppName("KafkaExample").setMaster("local[*]"); String inputDirectory="E:\\hadoop\\streamFolder\\"; JavaSparkContext sc = new JavaSparkContext(conf); JavaStreamingContext streamingContext = new JavaStreamingContext(sc, Durations.seconds(1)); // streamingContext.checkpoint("E:\\hadoop\\checkpoint"); Logger rootLogger = LogManager.getRootLogger(); rootLogger.setLevel(Level.WARN); JavaDStream<String> streamfile = streamingContext.textFileStream(inputDirectory); streamfile.print(); streamfile.foreachRDD(rdd-> rdd.foreach(x -> System.out.println(x))); JavaPairDStream<LongWritable, Text> streamedFile = streamingContext.fileStream(inputDirectory, LongWritable.class, Text.class, TextInputFormat.class); streamedFile.print(); streamingContext.start(); try { streamingContext.awaitTermination(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
Example 4
Source File: WordCountSocketJava8Ex.java From Apache-Spark-2x-for-Java-Developers with MIT License | 5 votes |
public static void main(String[] args) throws Exception { System.setProperty("hadoop.home.dir", "E:\\hadoop"); SparkConf sparkConf = new SparkConf().setAppName("WordCountSocketEx").setMaster("local[*]"); JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1)); List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<>("hello", 10), new Tuple2<>("world", 10)); JavaPairRDD<String, Integer> initialRDD = streamingContext.sparkContext().parallelizePairs(tuples); JavaReceiverInputDStream<String> StreamingLines = streamingContext.socketTextStream( "10.0.75.1", Integer.parseInt("9000"), StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = StreamingLines.flatMap( str -> Arrays.asList(str.split(" ")).iterator() ); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(str-> new Tuple2<>(str, 1)).reduceByKey((count1,count2) ->count1+count2 ); wordCounts.print(); JavaPairDStream<String, Integer> joinedDstream = wordCounts.transformToPair( new Function<JavaPairRDD<String, Integer>, JavaPairRDD<String, Integer>>() { @Override public JavaPairRDD<String, Integer> call(JavaPairRDD<String, Integer> rdd) throws Exception { rdd.join(initialRDD).mapToPair(new PairFunction<Tuple2<String,Tuple2<Integer,Integer>>, String, Integer>() { @Override public Tuple2<String, Integer> call(Tuple2<String, Tuple2<Integer, Integer>> joinedTuple) throws Exception { // TODO Auto-generated method stub return new Tuple2<>( joinedTuple._1(), (joinedTuple._2()._1()+joinedTuple._2()._2()) ); } }); return rdd; } }); joinedDstream.print(); streamingContext.start(); streamingContext.awaitTermination(); }
Example 5
Source File: JavaCustomReceiver.java From SparkDemo with MIT License | 5 votes |
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: JavaCustomReceiver <hostname> <port>"); System.exit(1); } StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaCustomReceiver"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000)); // Create an input stream with the custom receiver on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') JavaReceiverInputDStream<String> lines = ssc.receiverStream( new JavaCustomReceiver(args[0], Integer.parseInt(args[1]))); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Arrays.asList(SPACE.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); ssc.start(); ssc.awaitTermination(); }
Example 6
Source File: WindowBatchInterval.java From Apache-Spark-2x-for-Java-Developers with MIT License | 4 votes |
public static void main(String[] args) { //Window Specific property if Hadoop is not instaalled or HADOOP_HOME is not set System.setProperty("hadoop.home.dir", "E:\\hadoop"); //Logger rootLogger = LogManager.getRootLogger(); //rootLogger.setLevel(Level.WARN); SparkConf conf = new SparkConf().setAppName("KafkaExample").setMaster("local[*]"); JavaSparkContext sc = new JavaSparkContext(conf); JavaStreamingContext streamingContext = new JavaStreamingContext(sc, Durations.minutes(2)); streamingContext.checkpoint("E:\\hadoop\\checkpoint"); Logger rootLogger = LogManager.getRootLogger(); rootLogger.setLevel(Level.WARN); List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<>("hello", 10), new Tuple2<>("world", 10)); JavaPairRDD<String, Integer> initialRDD = streamingContext.sparkContext().parallelizePairs(tuples); JavaReceiverInputDStream<String> StreamingLines = streamingContext.socketTextStream( "10.0.75.1", Integer.parseInt("9000"), StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = StreamingLines.flatMap( str -> Arrays.asList(str.split(" ")).iterator() ); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(str-> new Tuple2<>(str, 1)).reduceByKey((count1,count2) ->count1+count2 ); wordCounts.print(); wordCounts.window(Durations.minutes(8)).countByValue() .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2()))); wordCounts.window(Durations.minutes(8),Durations.minutes(2)).countByValue() .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2()))); wordCounts.window(Durations.minutes(12),Durations.minutes(8)).countByValue() .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2()))); wordCounts.window(Durations.minutes(2),Durations.minutes(2)).countByValue() .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2()))); wordCounts.window(Durations.minutes(12),Durations.minutes(12)).countByValue() .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2()))); //comment these two operation to make it run wordCounts.window(Durations.minutes(5),Durations.minutes(2)).countByValue() .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2()))); wordCounts.window(Durations.minutes(10),Durations.minutes(1)).countByValue() .foreachRDD(tRDD -> tRDD.foreach(x->System.out.println(new Date()+" ::The window count tag is ::"+x._1() +" and the val is ::"+x._2()))); streamingContext.start(); try { streamingContext.awaitTermination(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
Example 7
Source File: JavaNetworkWordCount.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: JavaNetworkWordCount <hostname> <port>"); System.exit(1); } StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); // Create a JavaReceiverInputDStream on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') // Note that no duplication in storage level only for running locally. // Replication necessary in distributed scenario for fault tolerance. JavaReceiverInputDStream<String> lines = ssc.socketTextStream( args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Arrays.asList(SPACE.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); ssc.start(); ssc.awaitTermination(); }
Example 8
Source File: JavaKafkaWordCount.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) throws Exception { if (args.length < 4) { System.err.println("Usage: JavaKafkaWordCount <zkQuorum> <group> <topics> <numThreads>"); System.exit(1); } StreamingExamples.setStreamingLogLevels(); SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount"); // Create the context with 2 seconds batch size JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(2000)); int numThreads = Integer.parseInt(args[3]); Map<String, Integer> topicMap = new HashMap<>(); String[] topics = args[2].split(","); for (String topic: topics) { topicMap.put(topic, numThreads); } JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, args[0], args[1], topicMap); JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Arrays.asList(SPACE.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); jssc.start(); jssc.awaitTermination(); }
Example 9
Source File: JavaKafkaReceiverWordCount.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) { StreamingExamples.setStreamingLogLevels(); SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaReceiverWordCount").setMaster("local[4]"); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(6)); Map<String, Integer> topicMap = new HashMap<String, Integer>(); // key是topic名称,value是线程数量 topicMap.put("2017-7-26", 1); String zookeeperList = "master:2181,slave1:2181,slave2:2181"; JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, zookeeperList, "JavaKafkaReceiverWordCount", topicMap); JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Lists.newArrayList(SPACE.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); jssc.start(); try { jssc.awaitTermination(); } catch (Exception e) { e.printStackTrace(); } }
Example 10
Source File: JavaNetworkWordCount.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) { /** * 资源.setMaster("local[2]")必须大于1 一个负责取数据 其他负责计算 */ // if (args.length < 2) { // System.err.println("Usage: JavaNetworkWordCount <hostname> <port>"); // System.exit(1); // } StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size SparkConf sparkConf = SparkUtils.getLocalSparkConf(JavaNetworkWordCount.class); /* * 创建该对象类似于spark core中的JavaSparkContext * 该对象除了接受SparkConf对象,还接收了一个BatchInterval参数,就算说,每收集多长时间去划分一个人Batch即RDD去执行 */ JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(2)); /* * 首先创建输入DStream,代表一个数据比如这里从socket或KafKa来持续不断的进入实时数据流 * 创建一个监听Socket数据量,RDD里面的每一个元素就是一行行的文本 */ JavaReceiverInputDStream<String> lines = ssc.socketTextStream("192.168.2.1", 9999, StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Lists.newArrayList(SPACE.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); ssc.start(); try { ssc.awaitTermination(); } catch (Exception e) { e.printStackTrace(); } }
Example 11
Source File: JavaHDFSWordCount.java From SparkDemo with MIT License | 4 votes |
/** * To run this on your local machine, you need to first run a Netcat server * `$ nc -lk 9999` and then run the example `$ bin/run-example * org.apache.spark.examples.streaming.JavaNetworkWordCount localhost 9999` */ public static void main(String[] args) { SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount").setMaster("local[5]"); /* * 创建该对象类似于spark core中的JavaSparkContext * 该对象除了接受SparkConf对象,还接收了一个BatchInterval参数,就算说, * 没收集多长时间去划分一个人Batch即RDD去执行 */ JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(5)); /* * 首先创建输入DStream,代表一个数据比如这里从socket或KafKa来持续不断的进入实时数据流 * 创建一个监听Socket数据量,RDD里面的每一个元素就是一行行的文本 */ JavaDStream<String> lines = ssc.textFileStream("hdfs://master:8020/wordcount_dir"); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Lists.newArrayList(SPACE.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); ssc.start(); try { ssc.awaitTermination(); } catch (Exception e) { e.printStackTrace(); } }
Example 12
Source File: JavaKafkaDirectWordCount.java From SparkDemo with MIT License | 4 votes |
/** * 1.一对一 * 2.高效 * 3.准确的只计算一次 * * @param args */ public static void main(String[] args) { StreamingExamples.setStreamingLogLevels(); SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaDirectWordCount").setMaster("local[1]"); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(6)); Map<String, String> kafkaParams = new HashMap<String, String>(); // key是topic名称,value是线程数量 kafkaParams.put("metadata.broker.list", "master:9092,slave1:9092,slave2:9092"); // 指定broker在哪 HashSet<String> topicsSet = new HashSet<String>(); topicsSet.add("2017-7-26"); // 指定操作的topic // Create direct kafka stream with brokers and topics createDirectStream() JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream( jssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet ); JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Lists.newArrayList(SPACE.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); jssc.start(); try { jssc.awaitTermination(); } catch (Exception e) { e.printStackTrace(); } }
Example 13
Source File: KafkaReceiverWordCountJava.java From Building-Data-Streaming-Applications-with-Apache-Kafka with MIT License | 4 votes |
public static void main(String[] args) throws Exception { String zkQuorum = "localhost:2181"; String groupName = "stream"; int numThreads = 3; String topicsName = "test1"; SparkConf sparkConf = new SparkConf().setAppName("WordCountKafkaStream"); JavaStreamingContext javaStreamingContext = new JavaStreamingContext(sparkConf, new Duration(5000)); Map<String, Integer> topicToBeUsedBySpark = new HashMap<>(); String[] topics = topicsName.split(","); for (String topic : topics) { topicToBeUsedBySpark.put(topic, numThreads); } JavaPairReceiverInputDStream<String, String> streamMessages = KafkaUtils.createStream(javaStreamingContext, zkQuorum, groupName, topicToBeUsedBySpark); JavaDStream<String> lines = streamMessages.map(new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Arrays.asList(WORD_DELIMETER.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); javaStreamingContext.start(); javaStreamingContext.awaitTermination(); }
Example 14
Source File: SparkStreamingPulsarReceiverExample.java From pulsar with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { if (args.length < 3) { System.err.println("Missing parameters!"); System.err.println("Usage: <pulsar-service-url> <topic> <sub>"); return; } String serviceUrl = args[0]; String inputTopic = args[1]; String subscription = args[2]; System.out.println("Parameters:"); System.out.println("\tServiceUrl:\t" + serviceUrl); System.out.println("\tTopic:\t" + inputTopic); System.out.println("\tSubscription:\t" + subscription); SparkConf sparkConf = new SparkConf().setAppName("Pulsar Spark Example"); JavaStreamingContext jsc = new JavaStreamingContext(sparkConf, Durations.seconds(60)); ConsumerConfigurationData<byte[]> pulsarConf = new ConsumerConfigurationData(); Set<String> set = new HashSet<>(); set.add(inputTopic); pulsarConf.setTopicNames(set); pulsarConf.setSubscriptionName(subscription); SparkStreamingPulsarReceiver pulsarReceiver = new SparkStreamingPulsarReceiver( serviceUrl, pulsarConf, new AuthenticationDisabled()); JavaReceiverInputDStream<byte[]> lineDStream = jsc.receiverStream(pulsarReceiver); JavaPairDStream<String, Integer> result = lineDStream.flatMap(x -> { String line = new String(x, StandardCharsets.UTF_8); List<String> list = Arrays.asList(line.split(" ")); return list.iterator(); }) .mapToPair(x -> new Tuple2<String, Integer>(x, 1)) .reduceByKey((x, y) -> x + y); result.print(); jsc.start(); jsc.awaitTermination(); }
Example 15
Source File: StateLessProcessingExample.java From Apache-Spark-2x-for-Java-Developers with MIT License | 3 votes |
public static void main(String[] args) throws InterruptedException { System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils"); SparkSession sparkSession = SparkSession.builder().master("local[*]").appName("stateless Streaming Example") .config("spark.sql.warehouse.dir", "file:////C:/Users/sgulati/spark-warehouse").getOrCreate(); JavaStreamingContext jssc = new JavaStreamingContext(new JavaSparkContext(sparkSession.sparkContext()), Durations.milliseconds(1000)); JavaReceiverInputDStream<String> inStream = jssc.socketTextStream("10.204.136.223", 9999); JavaDStream<FlightDetails> flightDetailsStream = inStream.map(x -> { ObjectMapper mapper = new ObjectMapper(); return mapper.readValue(x, FlightDetails.class); }); //flightDetailsStream.print(); //flightDetailsStream.foreachRDD((VoidFunction<JavaRDD<FlightDetails>>) rdd -> rdd.saveAsTextFile("hdfs://namenode:port/path")); JavaDStream<FlightDetails> window = flightDetailsStream.window(Durations.minutes(5),Durations.minutes(1)); JavaPairDStream<String, Double> transfomedWindow = window.mapToPair(f->new Tuple2<String,Double>(f.getFlightId(),f.getTemperature())). mapValues(t->new Tuple2<Double,Integer>(t,1)) .reduceByKey((t1, t2) -> new Tuple2<Double, Integer>(t1._1()+t2._1(), t1._2()+t2._2())).mapValues(t -> t._1()/t._2()); transfomedWindow.cache(); transfomedWindow.print(); jssc.start(); jssc.awaitTermination(); }