Java Code Examples for kafka.serializer.StringDecoder

The following examples show how to use kafka.serializer.StringDecoder. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: incubator-iotdb   Source File: KafkaConsumer.java    License: Apache License 2.0 6 votes vote down vote up
private void consume() {
  /**
   * Specify the number of consumer thread
   */
  Map<String, Integer> topicCountMap = new HashMap<>();
  topicCountMap.put(Constant.TOPIC, Constant.CONSUMER_THREAD_NUM);

  /**
   * Specify data decoder
   */
  StringDecoder keyDecoder = new StringDecoder(new VerifiableProperties());
  StringDecoder valueDecoder = new StringDecoder(new VerifiableProperties());

  Map<String, List<KafkaStream<String, String>>> consumerMap = consumer
      .createMessageStreams(topicCountMap, keyDecoder,
          valueDecoder);

  List<KafkaStream<String, String>> streams = consumerMap.get(Constant.TOPIC);
  ExecutorService executor = Executors.newFixedThreadPool(Constant.CONSUMER_THREAD_NUM);
  for (final KafkaStream<String, String> stream : streams) {
    executor.submit(new KafkaConsumerThread(stream));
  }
}
 
Example 2
Source Project: light_drtc   Source File: KafkaMqCollect.java    License: Apache License 2.0 6 votes vote down vote up
public void collectMq(){
	Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
       topicCountMap.put(Constants.kfTopic, new Integer(1));

       StringDecoder keyDecoder = new StringDecoder(new VerifiableProperties());
       StringDecoder valueDecoder = new StringDecoder(new VerifiableProperties());

       Map<String, List<KafkaStream<String, String>>> consumerMap =
               consumer.createMessageStreams(topicCountMap,keyDecoder,valueDecoder);
       
       KafkaStream<String, String> stream = consumerMap.get(Constants.kfTopic).get(0);
       ConsumerIterator<String, String> it = stream.iterator();
       MessageAndMetadata<String, String> msgMeta;
       while (it.hasNext()){
       	msgMeta = it.next();
       	super.mqTimer.parseMqText(msgMeta.key(), msgMeta.message());
       	//System.out.println(msgMeta.key()+"\t"+msgMeta.message());
       }
}
 
Example 3
Source Project: kafka-spark-avro-example   Source File: SparkStreaming.java    License: Apache License 2.0 6 votes vote down vote up
private static void processStream(JavaStreamingContext ssc, JavaSparkContext sc) {
  System.out.println("--> Processing stream");

  Map<String, String> props = new HashMap<>();
  props.put("bootstrap.servers", "localhost:9092");
  props.put("schema.registry.url", "http://localhost:8081");
  props.put("group.id", "spark");
  props.put("specific.avro.reader", "true");

  props.put("value.deserializer", "io.confluent.kafka.serializers.KafkaAvroDeserializer");
  props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");

  Set<String> topicsSet = new HashSet<>(Collections.singletonList("test"));

  JavaPairInputDStream<String, Object> stream = KafkaUtils.createDirectStream(ssc, String.class, Object.class,
    StringDecoder.class, KafkaAvroDecoder.class, props, topicsSet);

  stream.foreachRDD(rdd -> {
    rdd.foreachPartition(iterator -> {
        while (iterator.hasNext()) {
          Tuple2<String, Object> next = iterator.next();
          Model model = (Model) next._2();
          System.out.println(next._1() + " --> " + model);
        }
      }
    );
  });
}
 
Example 4
private void consumeMessages() {
    final Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
    topicCountMap.put(TOPIC, 1);
    final StringDecoder decoder =
            new StringDecoder(new VerifiableProperties());
    final Map<String, List<KafkaStream<String, String>>> consumerMap =
            consumer.createMessageStreams(topicCountMap, decoder, decoder);
    final KafkaStream<String, String> stream =
            consumerMap.get(TOPIC).get(0);
    final ConsumerIterator<String, String> iterator = stream.iterator();

    Thread kafkaMessageReceiverThread = new Thread(
            () -> {
                while (iterator.hasNext()) {
                    String msg = iterator.next().message();
                    msg = msg == null ? "<null>" : msg;
                    System.out.println("got message: " + msg);
                    messagesReceived.add(msg);
                }
            },
            "kafkaMessageReceiverThread"
    );
    kafkaMessageReceiverThread.start();

}
 
Example 5
Source Project: metrics-kafka   Source File: KafkaGraphiteConsumer.java    License: Apache License 2.0 6 votes vote down vote up
public KafkaGraphiteConsumer(String graphiteHost,
                             int graphitePort,
                             String zkConnect,
                             String topic,
                             String groupId,
                             int zkSessionTimeoutMs,
                             boolean readFromStartOfStream) {
    Properties props = new Properties();
    props.put("group.id", groupId);
    props.put("zookeeper.connect", zkConnect);
    props.put("auto.offset.reset", readFromStartOfStream ? "smallest" : "largest");
    props.put("zookeeper.session.timeout.ms", String.valueOf(zkSessionTimeoutMs));

    ConsumerConfig config = new ConsumerConfig(props);
    consumerConnector = Consumer.create(config);
    TopicFilter filterSpec = new Whitelist(topic);

    log.info("Trying to start consumer: topic=%s for zk=%s and groupId=%s".format(topic, zkConnect, groupId));
    messageStream = consumerConnector.createMessageStreamsByFilter(filterSpec,
            1,
            new StringDecoder(null),
            new StringDecoder(null)).head();
    log.info("Started consumer: topic=%s for zk=%s and groupId=%s".format(topic, zkConnect, groupId));
    graphiteClient = new GraphiteClient(graphiteHost, graphitePort);
}
 
Example 6
public void run() {
    // 1. 指定Topic
    Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
    topicCountMap.put(this.topic, this.numThreads);

    // 2. 指定数据的解码器
    StringDecoder keyDecoder = new StringDecoder(new VerifiableProperties());
    StringDecoder valueDecoder = new StringDecoder(new VerifiableProperties());

    // 3. 获取连接数据的迭代器对象集合
    /**
     * Key: Topic主题
     * Value: 对应Topic的数据流读取器,大小是topicCountMap中指定的topic大小
     */
    Map<String, List<KafkaStream<String, String>>> consumerMap = this.consumer.createMessageStreams(topicCountMap, keyDecoder, valueDecoder);

    // 4. 从返回结果中获取对应topic的数据流处理器
    List<KafkaStream<String, String>> streams = consumerMap.get(this.topic);

    // 5. 创建线程池
    this.executorPool = Executors.newFixedThreadPool(this.numThreads);


    // 6. 构建数据输出对象
    int threadNumber = 0;
    for (final KafkaStream<String, String> stream : streams) {
        this.executorPool.submit(new ConsumerKafkaStreamProcesser(stream, threadNumber));
        threadNumber++;
    }
}
 
Example 7
Source Project: dk-fitting   Source File: JavaKafkaConsumerHighAPIESImpl.java    License: Apache License 2.0 5 votes vote down vote up
public void run() {
    // 1. 指定Topic
    Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
    topicCountMap.put(this.topic, this.numThreads);

    // 2. 指定数据的解码器
    StringDecoder keyDecoder = new StringDecoder(new VerifiableProperties());
    StringDecoder valueDecoder = new StringDecoder(new VerifiableProperties());

    // 3. 获取连接数据的迭代器对象集合
    /**
     * Key: Topic主题
     * Value: 对应Topic的数据流读取器,大小是topicCountMap中指定的topic大小
     */
    Map<String, List<KafkaStream<String, String>>> consumerMap = this.consumer.createMessageStreams(topicCountMap,
            keyDecoder, valueDecoder);

    // 4. 从返回结果中获取对应topic的数据流处理器
    List<KafkaStream<String, String>> streams = consumerMap.get(this.topic);

    // 5. 创建线程池
    this.executorPool = Executors.newFixedThreadPool(this.numThreads);

    // 6. 构建数据输出对象
    int threadNumber = 0;
    for (final KafkaStream<String, String> stream : streams) {
        this.executorPool.submit(new ConsumerKafkaStreamProcesser(stream, threadNumber));
        threadNumber++;
    }
}
 
Example 8
public void run() {
    // 1. 指定Topic
    Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
    topicCountMap.put(this.topic, this.numThreads);

    // 2. 指定数据的解码器
    StringDecoder keyDecoder = new StringDecoder(new VerifiableProperties());
    StringDecoder valueDecoder = new StringDecoder(new VerifiableProperties());

    // 3. 获取连接数据的迭代器对象集合
    /**
     * Key: Topic主题
     * Value: 对应Topic的数据流读取器,大小是topicCountMap中指定的topic大小
     */
    Map<String, List<KafkaStream<String, String>>> consumerMap = this.consumer.createMessageStreams(topicCountMap,
            keyDecoder, valueDecoder);

    // 4. 从返回结果中获取对应topic的数据流处理器
    List<KafkaStream<String, String>> streams = consumerMap.get(this.topic);

    // 5. 创建线程池
    this.executorPool = Executors.newFixedThreadPool(this.numThreads);

    // 6. 构建数据输出对象
    int threadNumber = 0;
    for (final KafkaStream<String, String> stream : streams) {
        this.executorPool.submit(new ConsumerKafkaStreamProcesser(stream, threadNumber));
        threadNumber++;
    }
}
 
Example 9
Source Project: iotplatform   Source File: KafkaDemoClient.java    License: Apache License 2.0 5 votes vote down vote up
private static ConsumerIterator<String, String> buildConsumer(String topic) {
    Map<String, Integer> topicCountMap = new HashMap<>();
    topicCountMap.put(topic, 1);
    ConsumerConfig consumerConfig = new ConsumerConfig(consumerProperties());
    ConsumerConnector consumerConnector = Consumer.createJavaConsumerConnector(consumerConfig);
    Map<String, List<KafkaStream<String, String>>> consumers = consumerConnector.createMessageStreams(topicCountMap, new StringDecoder(null), new StringDecoder(null));
    KafkaStream<String, String> stream = consumers.get(topic).get(0);
    return stream.iterator();
}
 
Example 10
public JavaInputDStream<MessageAndMetadata<String,byte[]>> buildInputDStream(
        JavaStreamingContext streamingContext) {

    HashMap<String, String> kafkaParams = config.getKafkaParams();

    // Ugly compiler-pleasing acrobatics:
    @SuppressWarnings("unchecked")
    Class<MessageAndMetadata<String, byte[]>> streamClass =
            (Class<MessageAndMetadata<String, byte[]>>) (Class<?>) MessageAndMetadata.class;

    if (!KafkaManager.topicExists(config.getZkKafka(), config.getTopic())) {
        throw new RuntimeException("Topic does not exist on server");
    }

    Map<TopicAndPartition, Long> seedOffsetsMap = KafkaManager.getOffsets(config.getZkKafka(),
            config.getZkOffsetManager(), config.getKafkaGroupId(), config.getTopic(), config.getKafkaParams());

    // TODO: try generics, instead of hardcoded values
    JavaInputDStream<MessageAndMetadata<String, byte[]>> dStream = org.apache.spark.streaming.kafka.KafkaUtils.createDirectStream(
            streamingContext,
            String.class,  // change as necessary
            byte[].class,  // change as necessary
            StringDecoder.class,
            DefaultDecoder.class,
            streamClass,
            kafkaParams,
            seedOffsetsMap,
            Functions.<MessageAndMetadata<String, byte[]>>identity());
    return dStream;
}
 
Example 11
Source Project: pulsar   Source File: HighLevelConsumerExample.java    License: Apache License 2.0 5 votes vote down vote up
private static void consumeMessage(Arguments arguments) {

        Properties properties = new Properties();
        properties.put("zookeeper.connect", arguments.serviceUrl);
        properties.put("group.id", arguments.groupName);
        properties.put("consumer.id", "cons1");
        properties.put("auto.commit.enable", Boolean.toString(!arguments.autoCommitDisable));
        properties.put("auto.commit.interval.ms", "100");
        properties.put("queued.max.message.chunks", "100");

        ConsumerConfig conSConfig = new ConsumerConfig(properties);
        ConsumerConnector connector = Consumer.createJavaConsumerConnector(conSConfig);
        Map<String, Integer> topicCountMap = Collections.singletonMap(arguments.topicName, 2);
        Map<String, List<KafkaStream<String, Tweet>>> streams = connector.createMessageStreams(topicCountMap,
                new StringDecoder(null), new Tweet.TestDecoder());

        int count = 0;
        while (count < arguments.totalMessages || arguments.totalMessages == -1) {
            for (int i = 0; i < streams.size(); i++) {
                List<KafkaStream<String, Tweet>> kafkaStreams = streams.get(arguments.topicName);
                for (KafkaStream<String, Tweet> kafkaStream : kafkaStreams) {
                    for (MessageAndMetadata<String, Tweet> record : kafkaStream) {
                        log.info("Received tweet: {}-{}", record.message().userName, record.message().message);
                        count++;
                    }
                }
            }
        }

        connector.shutdown();

        log.info("successfully consumed message {}", count);
    }
 
Example 12
Source Project: metrics-kafka   Source File: MetricsReportingSteps.java    License: Apache License 2.0 5 votes vote down vote up
public String readMessage(ConsumerConnector consumer) {
    Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
    topicCountMap.put(topic, new Integer(1));
    KafkaStream<String, String> messageStream = consumer.createMessageStreamsByFilter(new Whitelist(topic),
                                                                                      1,
                                                                                      new StringDecoder(null),
                                                                                      new StringDecoder(null)).get(0);

    return messageStream.iterator().next().message();
}
 
Example 13
Source Project: mod-kafka   Source File: KafkaMessageProcessorIT.java    License: Apache License 2.0 5 votes vote down vote up
private void consumeMessages() {
    final Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
    topicCountMap.put(KafkaProperties.DEFAULT_TOPIC, 1);
    final StringDecoder decoder =
            new StringDecoder(new VerifiableProperties());
    final Map<String, List<KafkaStream<String, String>>> consumerMap =
            consumer.createMessageStreams(topicCountMap, decoder, decoder);
    final KafkaStream<String, String> stream =
            consumerMap.get(KafkaProperties.DEFAULT_TOPIC).get(0);
    final ConsumerIterator<String, String> iterator = stream.iterator();

    Thread kafkaMessageReceiverThread = new Thread(
            new Runnable() {
                @Override
                public void run() {
                    while (iterator.hasNext()) {
                        String msg = iterator.next().message();
                        msg = msg == null ? "<null>" : msg;
                        System.out.println("got message: " + msg);
                        messagesReceived.add(msg);
                    }
                }
            },
            "kafkaMessageReceiverThread"
    );
    kafkaMessageReceiverThread.start();

}
 
Example 14
public static void main(String[] args) throws InterruptedException {
        // 构建Spark Streaming上下文
        SparkConf conf = new SparkConf()
                .setMaster("local[2]")
                .setAppName("AdClickRealTimeStatSpark");
//				.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
//				.set("spark.default.parallelism", "1000");
//				.set("spark.streaming.blockInterval", "50");
//				.set("spark.streaming.receiver.writeAheadLog.enable", "true");

        // spark streaming的上下文是构建JavaStreamingContext对象
        // 而不是像之前的JavaSparkContext、SQLContext/HiveContext
        // 传入的第一个参数,和之前的spark上下文一样,也是SparkConf对象;第二个参数则不太一样

        // 第二个参数是spark streaming类型作业比较有特色的一个参数
        // 实时处理batch的interval
        // spark streaming,每隔一小段时间,会去收集一次数据源(kafka)中的数据,做成一个batch
        // 每次都是处理一个batch中的数据

        // 通常来说,batch interval,就是指每隔多少时间收集一次数据源中的数据,然后进行处理
        // 一遍spark streaming的应用,都是设置数秒到数十秒(很少会超过1分钟)

        // 咱们这里项目中,就设置5秒钟的batch interval
        // 每隔5秒钟,咱们的spark streaming作业就会收集最近5秒内的数据源接收过来的数据
        JavaStreamingContext jssc = new JavaStreamingContext(
                conf, Durations.seconds(5));
        jssc.checkpoint("hdfs://120.77.155.220:9000/streaming_checkpoint");

        // 正式开始进行代码的编写
        // 实现咱们需要的实时计算的业务逻辑和功能

        // 创建针对Kafka数据来源的输入DStream(离线流,代表了一个源源不断的数据来源,抽象)
        // 选用kafka direct api(很多好处,包括自己内部自适应调整每次接收数据量的特性,等等)

        // 构建kafka参数map
        // 主要要放置的就是,你要连接的kafka集群的地址(broker集群的地址列表)
        Map<String, String> kafkaParams = new HashMap<String, String>();
        kafkaParams.put("metadata.broker.list",
                ConfigurationManager.getProperty(Constants.KAFKA_METADATA_BROKER_LIST));

        // 构建topic set
        String kafkaTopics = ConfigurationManager.getProperty(Constants.KAFKA_TOPICS);
        String[] kafkaTopicsSplited = kafkaTopics.split(",");

        Set<String> topics = new HashSet<String>();
        for (String kafkaTopic : kafkaTopicsSplited) {
            topics.add(kafkaTopic);
        }
        // 基于kafka direct api模式,构建出了针对kafka集群中指定topic的输入DStream
        // 两个值,val1,val2;val1没有什么特殊的意义;val2中包含了kafka topic中的一条一条的实时日志数据
        JavaPairInputDStream<String, String> adRealTimeLogDStream = KafkaUtils.createDirectStream(
                jssc,
                String.class,
                String.class,
                StringDecoder.class,
                StringDecoder.class,
                kafkaParams,
                topics);

//		adRealTimeLogDStream.repartition(1000);

        // 根据动态黑名单进行数据过滤
        JavaPairDStream<String, String> filteredAdRealTimeLogDStream =
                filterByBlacklist(adRealTimeLogDStream);

        // 生成动态黑名单
        generateDynamicBlacklist(filteredAdRealTimeLogDStream);

        // 业务功能一:计算广告点击流量实时统计结果(yyyyMMdd_province_city_adid,clickCount)
        // 最粗
        JavaPairDStream<String, Long> adRealTimeStatDStream = calculateRealTimeStat(
                filteredAdRealTimeLogDStream);

        // 业务功能二:实时统计每天每个省份top3热门广告
        // 统计的稍微细一些了
        calculateProvinceTop3Ad(adRealTimeStatDStream);

        // 业务功能三:实时统计每天每个广告在最近1小时的滑动窗口内的点击趋势(每分钟的点击量)
        // 统计的非常细了
        // 我们每次都可以看到每个广告,最近一小时内,每分钟的点击量
        // 每支广告的点击趋势
        calculateAdClickCountByWindow(adRealTimeLogDStream);

        // 构建完spark streaming上下文之后,记得要进行上下文的启动、等待执行结束、关闭
        jssc.start();
        jssc.awaitTermination();
        jssc.close();
    }
 
Example 15
public static void main(String[] args) throws Exception {
    if (args.length < 4) {
        System.err.println("Usage: JavaKafkaWordCount <zkQuorum> <group> <topics> <numThreads>");
        System.exit(1);
    }

    SparkConf conf = new SparkConf()
            .setMaster("local[2]")
            .setAppName("JavaKafkaWordCount");


    // 咱们这里项目中,就设置5秒钟的batch interval
    // 每隔5秒钟,咱们的spark streaming作业就会收集最近5秒内的数据源接收过来的数据
    JavaStreamingContext jssc = new JavaStreamingContext(
            conf, Durations.seconds(5));
    jssc.checkpoint("hdfs://Master:9000/streaming_checkpoint");

    // 正式开始进行代码的编写
    // 实现咱们需要的实时计算的业务逻辑和功能

    // 创建针对Kafka数据来源的输入DStream(离线流,代表了一个源源不断的数据来源,抽象)
    // 选用kafka direct api(很多好处,包括自己内部自适应调整每次接收数据量的特性,等等)

    // 构建kafka参数map
    // 主要要放置的就是,你要连接的kafka集群的地址(broker集群的地址列表)
    Map<String, String> kafkaParams = new HashMap<String, String>();
    kafkaParams.put("metadata.broker.list",
            ConfigurationManager.getProperty(Constants.KAFKA_METADATA_BROKER_LIST));

    // 构建topic set
    String kafkaTopics = "streamingtopic";//ConfigurationManager.getProperty("streamingtopic");
    String[] kafkaTopicsSplited = kafkaTopics.split(",");

    Set<String> topics = new HashSet<String>();
    for (String kafkaTopic : kafkaTopicsSplited) {
        topics.add(kafkaTopic);
    }
    // 基于kafka direct api模式,构建出了针对kafka集群中指定topic的输入DStream
    // 两个值,val1,val2;val1没有什么特殊的意义;val2中包含了kafka topic中的一条一条的实时日志数据
    JavaPairInputDStream<String, String> adRealTimeLogDStream = KafkaUtils.createDirectStream(
            jssc,
            String.class,
            String.class,
            StringDecoder.class,
            StringDecoder.class,
            kafkaParams,
            topics);

    JavaDStream<String> lines = adRealTimeLogDStream.map(Tuple2::_2);
    JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(SPACE.split(x)).iterator());
    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<>(s, 1))
            .reduceByKey((i1, i2) -> i1 + i2);
    wordCounts.print();
    jssc.start();
    jssc.awaitTermination();
}
 
Example 16
Source Project: hermes   Source File: NativeKafkaWithStringDecoderTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testNative() throws IOException, InterruptedException, ExecutionException {
	String topic = "kafka.SimpleTextTopic";
	int msgNum = 200;
	final CountDownLatch countDown = new CountDownLatch(msgNum);

	Properties producerProps = new Properties();
	// Producer
	producerProps.put("bootstrap.servers", "");
	producerProps.put("value.serializer", StringSerializer.class.getCanonicalName());
	producerProps.put("key.serializer", StringSerializer.class.getCanonicalName());
	// Consumer
	Properties consumerProps = new Properties();
	consumerProps.put("zookeeper.connect", "");
	consumerProps.put("group.id", "GROUP_" + topic);

	final List<String> actualResult = new ArrayList<String>();
	final List<String> expectedResult = new ArrayList<String>();

	ConsumerConnector consumerConnector = Consumer.createJavaConsumerConnector(new ConsumerConfig(consumerProps));
	Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
	topicCountMap.put(topic, 1);
	final List<KafkaStream<String, String>> streams = consumerConnector.createMessageStreams(topicCountMap,
	      new StringDecoder(null), new StringDecoder(null)).get(topic);
	for (final KafkaStream<String, String> stream : streams) {
		new Thread() {
			public void run() {
				for (MessageAndMetadata<String, String> msgAndMetadata : stream) {
					try {
						System.out.println("received: " + msgAndMetadata.message());
						actualResult.add(msgAndMetadata.message());
						countDown.countDown();
					} catch (Exception e) {
						e.printStackTrace();
					}
				}
			}
		}.start();
	}

	KafkaProducer<String, String> producer = new KafkaProducer<String, String>(producerProps);
	int i = 0;
	while (i < msgNum) {
		ProducerRecord<String, String> data = new ProducerRecord<String, String>(topic, "test-message" + i++);
		Future<RecordMetadata> send = producer.send(data);
		send.get();
		if (send.isDone()) {
			System.out.println("sending: " + data.value());
			expectedResult.add(data.value());
		}
	}

	countDown.await();

	Assert.assertArrayEquals(expectedResult.toArray(), actualResult.toArray());

	consumerConnector.shutdown();
	producer.close();
}
 
Example 17
Source Project: SparkDemo   Source File: JavaDirectKafkaWordCount.java    License: MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
  if (args.length < 2) {
    System.err.println("Usage: JavaDirectKafkaWordCount <brokers> <topics>\n" +
        "  <brokers> is a list of one or more Kafka brokers\n" +
        "  <topics> is a list of one or more kafka topics to consume from\n\n");
    System.exit(1);
  }

  StreamingExamples.setStreamingLogLevels();

  String brokers = args[0];
  String topics = args[1];

  // Create context with a 2 seconds batch interval
  SparkConf sparkConf = new SparkConf().setAppName("JavaDirectKafkaWordCount");
  JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(2));

  Set<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(",")));
  Map<String, String> kafkaParams = new HashMap<>();
  kafkaParams.put("metadata.broker.list", brokers);

  // Create direct kafka stream with brokers and topics
  JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(
      jssc,
      String.class,
      String.class,
      StringDecoder.class,
      StringDecoder.class,
      kafkaParams,
      topicsSet
  );

  // Get the lines, split them into words, count the words and print
  JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
    @Override
    public String call(Tuple2<String, String> tuple2) {
      return tuple2._2();
    }
  });
  JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
    @Override
    public Iterator<String> call(String x) {
      return Arrays.asList(SPACE.split(x)).iterator();
    }
  });
  JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
    new PairFunction<String, String, Integer>() {
      @Override
      public Tuple2<String, Integer> call(String s) {
        return new Tuple2<>(s, 1);
      }
    }).reduceByKey(
      new Function2<Integer, Integer, Integer>() {
      @Override
      public Integer call(Integer i1, Integer i2) {
        return i1 + i2;
      }
    });
  wordCounts.print();

  // Start the computation
  jssc.start();
  jssc.awaitTermination();
}
 
Example 18
Source Project: SparkDemo   Source File: JavaKafkaDirectWordCount.java    License: MIT License 4 votes vote down vote up
/**
 * 1.一对一
 * 2.高效
 * 3.准确的只计算一次
 *
 * @param args
 */
public static void main(String[] args) {
    StreamingExamples.setStreamingLogLevels();
    SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaDirectWordCount").setMaster("local[1]");
    JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(6));

    Map<String, String> kafkaParams = new HashMap<String, String>(); // key是topic名称,value是线程数量
    kafkaParams.put("metadata.broker.list", "master:9092,slave1:9092,slave2:9092"); // 指定broker在哪
    HashSet<String> topicsSet = new HashSet<String>();
    topicsSet.add("2017-7-26"); // 指定操作的topic

    // Create direct kafka stream with brokers and topics createDirectStream()
    JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(
            jssc,
            String.class,
            String.class,
            StringDecoder.class,
            StringDecoder.class,
            kafkaParams,
            topicsSet
    );

    JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
        @Override
        public String call(Tuple2<String, String> tuple2) {
            return tuple2._2();
        }
    });

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterator<String> call(String x) {
            return Lists.newArrayList(SPACE.split(x)).iterator();
        }
    });

    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
            return new Tuple2<String, Integer>(s, 1);
        }
    }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    wordCounts.print();
    jssc.start();
    try {
        jssc.awaitTermination();
    } catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example 19
public static void main(String[] args) throws Exception {

        String brokers = "localhost:9092";
        String topics = "iplog";
        CacheIPLookup cacheIPLookup = new CacheIPLookup();
        SparkConf sparkConf = new SparkConf().setAppName("IP_FRAUD");
        JavaStreamingContext javaStreamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(2));

        Set<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(",")));
        Map<String, String> kafkaConfiguration = new HashMap<>();
        kafkaConfiguration.put("metadata.broker.list", brokers);
        kafkaConfiguration.put("group.id", "ipfraud");
        kafkaConfiguration.put("auto.offset.reset", "smallest");

        JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(
                javaStreamingContext,
                String.class,
                String.class,
                StringDecoder.class,
                StringDecoder.class,
                kafkaConfiguration,
                topicsSet
        );

        JavaDStream<String> lines = messages.map(Tuple2::_2);

        JavaDStream<String> fraudIPs = lines.filter(new Function<String, Boolean>() {
            @Override
            public Boolean call(String s) throws Exception {
                String IP = s.split(" ")[0];
                String[] ranges = IP.split("\\.");
                String range = null;
                try {
                    range = ranges[0] + "." + ranges[1];
                } catch (ArrayIndexOutOfBoundsException ex) {

                }
                return cacheIPLookup.isFraudIP(range);

            }
        });

        DStream<String> fraudDstream = fraudIPs.dstream();
        fraudDstream.saveAsTextFiles("FraudRecord", "");

        javaStreamingContext.start();
        javaStreamingContext.awaitTermination();
    }
 
Example 20
public static void main(String[] args) throws Exception {

        String brokers = "localhost:9092";
        String topics = "test1";

        SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("DirectKafkaWordCount");
        JavaStreamingContext javaStreamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(2));
        Set<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(",")));
        Map<String, String> kafkaConfiguration = new HashMap<>();
        kafkaConfiguration.put("metadata.broker.list", brokers);
        kafkaConfiguration.put("group.id", "stream_test8");
        kafkaConfiguration.put("auto.offset.reset", "smallest");

        JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(
                javaStreamingContext,
                String.class,
                String.class,
                StringDecoder.class,
                StringDecoder.class,
                kafkaConfiguration,
                topicsSet
        );

        JavaDStream<String> lines = messages.map(Tuple2::_2);

        JavaDStream<String> words = lines.flatMap(
                x ->
                        Arrays.asList(SPACE.split(x)
                ).iterator());

        JavaPairDStream<String, Integer> wordCounts = words.mapToPair(

                s -> new Tuple2<>(s, 1)

        )
                .reduceByKey((i1, i2) -> i1 + i2);

        //wordCounts.dstream().saveAsTextFiles("hdfs://10.200.99.197:8020/user/chanchal.singh/wordCounts", "result");
        wordCounts.print();
        javaStreamingContext.start();
        javaStreamingContext.awaitTermination();
    }
 
Example 21
Source Project: blog_demos   Source File: MessageServiceImpl.java    License: Apache License 2.0 4 votes vote down vote up
@PostConstruct
public void init(){
    logger.info("start init kafka consumer service");
    // 1. 创建Kafka连接器
    consumer = Consumer.createJavaConsumerConnector(createConsumerConfig(ZK, GROUP_ID));

    Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
    topicCountMap.put(TOPIC, THREAD_NUM);

    // 2. 指定数据的解码器
    StringDecoder keyDecoder = new StringDecoder(new VerifiableProperties());
    StringDecoder valueDecoder = new StringDecoder(new VerifiableProperties());

    // 3. 获取连接数据的迭代器对象集合
    /**
     * Key: Topic主题
     * Value: 对应Topic的数据流读取器,大小是topicCountMap中指定的topic大小
     */
    Map<String, List<KafkaStream<String, String>>> consumerMap = this.consumer.createMessageStreams(topicCountMap, keyDecoder, valueDecoder);

    // 4. 从返回结果中获取对应topic的数据流处理器
    List<KafkaStream<String, String>> streams = consumerMap.get(TOPIC);

    logger.info("streams size {}", streams.size());

    // 5. 创建线程池
    this.executorPool = new ThreadPoolExecutor(THREAD_NUM, THREAD_NUM,
            0,
            TimeUnit.MILLISECONDS,
            new LinkedBlockingQueue<Runnable>(),
            new CustomThreadFactory(),
            new ThreadPoolExecutor.AbortPolicy());

    // 6. 构建数据输出对象
    int threadNumber = 0;
    for (final KafkaStream<String, String> stream : streams) {
        this.executorPool.submit(new Processer(stream, threadNumber));
        threadNumber++;
    }

    logger.info("end init kafka consumer service");
}
 
Example 22
Source Project: SparkToParquet   Source File: AppMain.java    License: Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws IOException {
	Flags.setFromCommandLineArgs(THE_OPTIONS, args);

	// 初始化Spark Conf.
	SparkConf conf = new SparkConf().setAppName("A SECTONG Application: Apache Log Analysis with Spark");
	JavaSparkContext sc = new JavaSparkContext(conf);
	JavaStreamingContext jssc = new JavaStreamingContext(sc, Flags.getInstance().getSlideInterval());
	SQLContext sqlContext = new SQLContext(sc);

	// 初始化参数
	HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(Flags.getInstance().getKafka_topic().split(",")));
	HashMap<String, String> kafkaParams = new HashMap<String, String>();
	kafkaParams.put("metadata.broker.list", Flags.getInstance().getKafka_broker());

	// 从Kafka Stream获取数据
	JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class,
			StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet);

	JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
		private static final long serialVersionUID = 5266880065425088203L;

		public String call(Tuple2<String, String> tuple2) {
			return tuple2._2();
		}
	});

	JavaDStream<ApacheAccessLog> accessLogsDStream = lines.flatMap(line -> {
		List<ApacheAccessLog> list = new ArrayList<>();
		try {
			// 映射每一行
			list.add(ApacheAccessLog.parseFromLogLine(line));
			return list;
		} catch (RuntimeException e) {
			return list;
		}
	}).cache();

	accessLogsDStream.foreachRDD(rdd -> {

		// rdd to DataFrame
		DataFrame df = sqlContext.createDataFrame(rdd, ApacheAccessLog.class);
		// 写入Parquet文件
		df.write().partitionBy("ipAddress", "method", "responseCode").mode(SaveMode.Append).parquet(Flags.getInstance().getParquetFile());

		return null;
	});

	// 启动Streaming服务器
	jssc.start(); // 启动计算
	jssc.awaitTermination(); // 等待终止
}
 
Example 23
Source Project: iot-traffic-monitor   Source File: IoTDataProcessor.java    License: Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
 //read Spark and Cassandra properties and create SparkConf
 Properties prop = PropertyFileReader.readPropertyFile();		
 SparkConf conf = new SparkConf()
		 .setAppName(prop.getProperty("com.iot.app.spark.app.name"))
		 .setMaster(prop.getProperty("com.iot.app.spark.master"))
		 .set("spark.cassandra.connection.host", prop.getProperty("com.iot.app.cassandra.host"))
		 .set("spark.cassandra.connection.port", prop.getProperty("com.iot.app.cassandra.port"))
		 .set("spark.cassandra.connection.keep_alive_ms", prop.getProperty("com.iot.app.cassandra.keep_alive"));		 
 //batch interval of 5 seconds for incoming stream		 
 JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));	
 //add check point directory
 jssc.checkpoint(prop.getProperty("com.iot.app.spark.checkpoint.dir"));
 
 //read and set Kafka properties
 Map<String, String> kafkaParams = new HashMap<String, String>();
 kafkaParams.put("zookeeper.connect", prop.getProperty("com.iot.app.kafka.zookeeper"));
 kafkaParams.put("metadata.broker.list", prop.getProperty("com.iot.app.kafka.brokerlist"));
 String topic = prop.getProperty("com.iot.app.kafka.topic");
 Set<String> topicsSet = new HashSet<String>();
 topicsSet.add(topic);
 //create direct kafka stream
 JavaPairInputDStream<String, IoTData> directKafkaStream = KafkaUtils.createDirectStream(
	        jssc,
	        String.class,
	        IoTData.class,
	        StringDecoder.class,
	        IoTDataDecoder.class,
	        kafkaParams,
	        topicsSet
	    );
 logger.info("Starting Stream Processing");
 
 //We need non filtered stream for poi traffic data calculation
 JavaDStream<IoTData> nonFilteredIotDataStream = directKafkaStream.map(tuple -> tuple._2());
 
 //We need filtered stream for total and traffic data calculation
 JavaPairDStream<String,IoTData> iotDataPairStream = nonFilteredIotDataStream.mapToPair(iot -> new Tuple2<String,IoTData>(iot.getVehicleId(),iot)).reduceByKey((a, b) -> a );

 // Check vehicle Id is already processed
 JavaMapWithStateDStream<String, IoTData, Boolean, Tuple2<IoTData,Boolean>> iotDStreamWithStatePairs = iotDataPairStream
					.mapWithState(StateSpec.function(processedVehicleFunc).timeout(Durations.seconds(3600)));//maintain state for one hour

 // Filter processed vehicle ids and keep un-processed
 JavaDStream<Tuple2<IoTData,Boolean>> filteredIotDStreams = iotDStreamWithStatePairs.map(tuple2 -> tuple2)
					.filter(tuple -> tuple._2.equals(Boolean.FALSE));

 // Get stream of IoTdata
 JavaDStream<IoTData> filteredIotDataStream = filteredIotDStreams.map(tuple -> tuple._1);
 
 //cache stream as it is used in total and window based computation
 filteredIotDataStream.cache();
 	 
 //process data
 IoTTrafficDataProcessor iotTrafficProcessor = new IoTTrafficDataProcessor();
 iotTrafficProcessor.processTotalTrafficData(filteredIotDataStream);
 iotTrafficProcessor.processWindowTrafficData(filteredIotDataStream);

 //poi data
 POIData poiData = new POIData();
 poiData.setLatitude(33.877495);
 poiData.setLongitude(-95.50238);
 poiData.setRadius(30);//30 km
 
 //broadcast variables. We will monitor vehicles on Route 37 which are of type Truck
 Broadcast<Tuple3<POIData, String, String>> broadcastPOIValues = jssc.sparkContext().broadcast(new Tuple3<>(poiData,"Route-37","Truck"));
 //call method  to process stream
 iotTrafficProcessor.processPOIData(nonFilteredIotDataStream,broadcastPOIValues);
 
 //start context
 jssc.start();            
 jssc.awaitTermination();  
}
 
Example 24
Source Project: pulsar   Source File: ConsumerIterator.java    License: Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public PulsarMessageAndMetadata<K, V> next() {

    Message<byte[]> msg = receivedMessages.poll();
    if (msg == null) {
        try {
            msg = consumer.receive();
        } catch (PulsarClientException e) {
            log.warn("Failed to receive message for {}-{}, {}", consumer.getTopic(), consumer.getSubscription(),
                    e.getMessage(), e);
            throw new RuntimeException(
                    "failed to receive message from " + consumer.getTopic() + "-" + consumer.getSubscription());
        }
    }

    int partition = TopicName.getPartitionIndex(consumer.getTopic());
    long offset = MessageIdUtils.getOffset(msg.getMessageId());
    String key = msg.getKey();
    byte[] value = msg.getValue();

    K desKey = null;
    V desValue = null;

    if (StringUtils.isNotBlank(key)) {
        if (keyDeSerializer.isPresent() && keyDeSerializer.get() instanceof StringDecoder) {
            desKey = (K) key;
        } else {
            byte[] decodedBytes = Base64.getDecoder().decode(key);
            desKey = keyDeSerializer.isPresent() ? keyDeSerializer.get().fromBytes(decodedBytes)
                    : (K) DEFAULT_DECODER.fromBytes(decodedBytes);
        }
    }

    if (value != null) {
        desValue = valueDeSerializer.isPresent() ? valueDeSerializer.get().fromBytes(msg.getData())
                : (V) DEFAULT_DECODER.fromBytes(msg.getData());
    }

    PulsarMessageAndMetadata<K, V> msgAndMetadata = new PulsarMessageAndMetadata<>(consumer.getTopic(), partition,
            null, offset, keyDeSerializer.orElse(null), valueDeSerializer.orElse(null), desKey, desValue);

    if (isAutoCommit) {
        // Commit the offset of previously dequeued messages
        consumer.acknowledgeCumulativeAsync(msg);
    }

    lastConsumedMessageId = msg.getMessageId();
    return msgAndMetadata;
}
 
Example 25
Source Project: pulsar   Source File: KafkaProducerConsumerTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testPulsarKafkaProducerWithSerializer() throws Exception {
    final String serviceUrl = lookupUrl.toString();
    final String topicName = "persistent://my-property/my-ns/my-topic1";

    // (1) Create consumer
    Properties properties = new Properties();
    properties.put("zookeeper.connect", serviceUrl);
    properties.put("group.id", "group1");
    properties.put("consumer.id", "cons1");
    properties.put("auto.commit.enable", "true");
    properties.put("auto.commit.interval.ms", "100");
    properties.put("queued.max.message.chunks", "100");

    ConsumerConfig conSConfig = new ConsumerConfig(properties);
    ConsumerConnector connector = new ConsumerConnector(conSConfig);
    Map<String, Integer> topicCountMap = Collections.singletonMap(topicName, 2);
    Map<String, List<PulsarKafkaStream<String, Tweet>>> streams = connector.createMessageStreams(topicCountMap,
            new StringDecoder(null), new TestDecoder());

    // (2) Create producer
    Properties properties2 = new Properties();
    properties2.put(BROKER_URL, serviceUrl);
    properties2.put(PRODUCER_TYPE, "sync");
    properties2.put(SERIALIZER_CLASS, TestEncoder.class.getName());
    properties2.put(KEY_SERIALIZER_CLASS, StringEncoder.class.getName());
    properties2.put(PARTITIONER_CLASS, TestPartitioner.class.getName());
    properties2.put(COMPRESSION_CODEC, "gzip"); // compression: ZLIB
    properties2.put(QUEUE_ENQUEUE_TIMEOUT_MS, "-1"); // block queue if full => -1 = true
    properties2.put(QUEUE_BUFFERING_MAX_MESSAGES, "6000"); // queue max message
    properties2.put(QUEUE_BUFFERING_MAX_MS, "100"); // batch delay
    properties2.put(BATCH_NUM_MESSAGES, "500"); // batch msg
    properties2.put(CLIENT_ID, "test");
    ProducerConfig config = new ProducerConfig(properties2);
    PulsarKafkaProducer<String, Tweet> producer = new PulsarKafkaProducer<>(config);

    String name = "user";
    String msg = "Hello World!";
    Set<Tweet> published = Sets.newHashSet();
    Set<Tweet> received = Sets.newHashSet();
    int total = 10;
    for (int i = 0; i < total; i++) {
        String sendMessage = msg + i;
        Tweet tweet = new Tweet(name, sendMessage);
        KeyedMessage<String, Tweet> message = new KeyedMessage<>(topicName, name, tweet);
        published.add(tweet);
        producer.send(message);
    }
    while (received.size() < total) {
        for (int i = 0; i < streams.size(); i++) {
            List<PulsarKafkaStream<String, Tweet>> kafkaStreams = streams.get(topicName);
            assertEquals(kafkaStreams.size(), 2);
            for (PulsarKafkaStream<String, Tweet> kafkaStream : kafkaStreams) {
                for (PulsarMessageAndMetadata<String, KafkaProducerConsumerTest.Tweet> record : kafkaStream) {
                    received.add(record.message());
                    assertEquals(record.key(), name);
                }
            }
        }
    }
    assertEquals(published.size(), received.size());
    published.removeAll(received);
    assertTrue(published.isEmpty());
}
 
Example 26
Source Project: streams   Source File: KafkaPersistReader.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public void startStream() {

  Properties props = new Properties();
  props.setProperty("serializer.encoding", "UTF8");

  ConsumerConfig consumerConfig = new ConsumerConfig(props);

  consumerConnector = Consumer.createJavaConsumerConnector(consumerConfig);

  Whitelist topics = new Whitelist(config.getTopic());
  VerifiableProperties vprops = new VerifiableProperties(props);

  inStreams = consumerConnector.createMessageStreamsByFilter(topics, 1, new StringDecoder(vprops), new StringDecoder(vprops));

  for (final KafkaStream stream : inStreams) {
    executor.submit(new KafkaPersistReaderTask(this, stream));
  }

}