org.apache.spark.streaming.api.java.JavaPairReceiverInputDStream Java Examples

The following examples show how to use org.apache.spark.streaming.api.java.JavaPairReceiverInputDStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JavaKafkaWordCount.java    From SparkDemo with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
  if (args.length < 4) {
    System.err.println("Usage: JavaKafkaWordCount <zkQuorum> <group> <topics> <numThreads>");
    System.exit(1);
  }

  StreamingExamples.setStreamingLogLevels();
  SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount");
  // Create the context with 2 seconds batch size
  JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(2000));

  int numThreads = Integer.parseInt(args[3]);
  Map<String, Integer> topicMap = new HashMap<>();
  String[] topics = args[2].split(",");
  for (String topic: topics) {
    topicMap.put(topic, numThreads);
  }

  JavaPairReceiverInputDStream<String, String> messages =
          KafkaUtils.createStream(jssc, args[0], args[1], topicMap);

  JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
    @Override
    public String call(Tuple2<String, String> tuple2) {
      return tuple2._2();
    }
  });

  JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
    @Override
    public Iterator<String> call(String x) {
      return Arrays.asList(SPACE.split(x)).iterator();
    }
  });

  JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
    new PairFunction<String, String, Integer>() {
      @Override
      public Tuple2<String, Integer> call(String s) {
        return new Tuple2<>(s, 1);
      }
    }).reduceByKey(new Function2<Integer, Integer, Integer>() {
      @Override
      public Integer call(Integer i1, Integer i2) {
        return i1 + i2;
      }
    });

  wordCounts.print();
  jssc.start();
  jssc.awaitTermination();
}
 
Example #2
Source File: JavaKafkaReceiverWordCount.java    From SparkDemo with MIT License 4 votes vote down vote up
public static void main(String[] args) {
	StreamingExamples.setStreamingLogLevels();
	SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaReceiverWordCount").setMaster("local[4]");
	JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(6));

	Map<String, Integer> topicMap = new HashMap<String, Integer>(); // key是topic名称,value是线程数量
	topicMap.put("2017-7-26", 1);

	String zookeeperList = "master:2181,slave1:2181,slave2:2181";

		JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, zookeeperList,
			"JavaKafkaReceiverWordCount", topicMap);

	JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
		@Override
		public String call(Tuple2<String, String> tuple2) {
			return tuple2._2();
		}
	});

	JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
		@Override
		public Iterator<String> call(String x) {
			return Lists.newArrayList(SPACE.split(x)).iterator();
		}
	});

	JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() {
		@Override
		public Tuple2<String, Integer> call(String s) {
			return new Tuple2<String, Integer>(s, 1);
		}
	}).reduceByKey(new Function2<Integer, Integer, Integer>() {
		@Override
		public Integer call(Integer i1, Integer i2) {
			return i1 + i2;
		}
	});

	wordCounts.print();
	jssc.start();

	try {
		jssc.awaitTermination();
	} catch (Exception e) {
		e.printStackTrace();
	}
}
 
Example #3
Source File: KafkaReceiverWordCountJava.java    From Building-Data-Streaming-Applications-with-Apache-Kafka with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    String zkQuorum = "localhost:2181";
    String groupName = "stream";
    int numThreads = 3;
    String topicsName = "test1";
    SparkConf sparkConf = new SparkConf().setAppName("WordCountKafkaStream");

    JavaStreamingContext javaStreamingContext = new JavaStreamingContext(sparkConf, new Duration(5000));

    Map<String, Integer> topicToBeUsedBySpark = new HashMap<>();
    String[] topics = topicsName.split(",");
    for (String topic : topics) {
        topicToBeUsedBySpark.put(topic, numThreads);
    }

    JavaPairReceiverInputDStream<String, String> streamMessages =
            KafkaUtils.createStream(javaStreamingContext, zkQuorum, groupName, topicToBeUsedBySpark);

    JavaDStream<String> lines = streamMessages.map(new Function<Tuple2<String, String>, String>() {
        @Override
        public String call(Tuple2<String, String> tuple2) {
            return tuple2._2();
        }
    });

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterator<String> call(String x) {
            return Arrays.asList(WORD_DELIMETER.split(x)).iterator();
        }
    });

    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
            new PairFunction<String, String, Integer>() {
                @Override
                public Tuple2<String, Integer> call(String s) {
                    return new Tuple2<>(s, 1);
                }
            }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    wordCounts.print();
    javaStreamingContext.start();
    javaStreamingContext.awaitTermination();
}
 
Example #4
Source File: KafkaStreamRestHandler.java    From elasticsearch-rest-command with The Unlicense 4 votes vote down vote up
@Override
protected void handleRequest(RestRequest request, RestChannel channel, Client client)
		throws Exception {
	final String topic = request.param("topic", "");
	final boolean schema = request.paramAsBoolean("schema", false);
	final String master = request.param("masterAddress", "local");
	final String hdfs =  request.param("hdfs", "hdfs://localhost:50070");
	final String memory =  request.param("memory", "2g");
	final String appName = request.param("appName", "appName-"+topic);
	final int duration = request.paramAsInt("duration", 1000);
	
	Thread exec = new Thread(new Runnable(){

		@Override
		public void run() {
		
			SparkConf sparkConf = new SparkConf().setAppName(appName).setMaster(master).set("spark.executor.memory", memory);
			JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(duration));
			
			Map<String, Integer> topicMap = new HashMap<String, Integer>();
			topicMap.put(topic, 3);
			
			JavaPairReceiverInputDStream<String, byte[]> kafkaStream = KafkaUtils.createStream(jssc, String.class, byte[].class, 
						kafka.serializer.DefaultDecoder.class, kafka.serializer.DefaultDecoder.class, null, 
						topicMap,  StorageLevel.MEMORY_ONLY());
	
			//JobConf confHadoop = new JobConf();
			//confHadoop.set("mapred.output.compress", "true");
			//confHadoop.set("mapred.output.compression.codec", "com.hadoop.compression.lzo.LzopCodec");
	
			kafkaStream.saveAsHadoopFiles(hdfs, "seq", Text.class, BytesWritable.class, KafkaStreamSeqOutputFormat.class);
			
			topicContextMap.put(topic, jssc);
			jssc.start();		
			jssc.awaitTermination();
			
		}
	});
	
	exec.start();
	
	channel.sendResponse(new BytesRestResponse(RestStatus.OK, String.format("{\"topic\":\"%s\"}",  topic)));
	
	
}