org.apache.kafka.streams.kstream.KStreamBuilder Java Examples

The following examples show how to use org.apache.kafka.streams.kstream.KStreamBuilder. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: KafkaStreamWordCount.java    From Building-Data-Streaming-Applications-with-Apache-Kafka with MIT License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    Properties kafkaStreamProperties = new Properties();
    kafkaStreamProperties.put(StreamsConfig.APPLICATION_ID_CONFIG, "kafka-stream-wordCount");
    kafkaStreamProperties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
    kafkaStreamProperties.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181");
    kafkaStreamProperties.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    kafkaStreamProperties.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());

    Serde<String> stringSerde = Serdes.String();
    Serde<Long> longSerde = Serdes.Long();

    KStreamBuilder streamTopology = new KStreamBuilder();
    KStream<String, String> topicRecords = streamTopology.stream(stringSerde, stringSerde, "input");
    KStream<String, Long> wordCounts = topicRecords
            .flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+")))
            .map((key, word) -> new KeyValue<>(word, word))
            .countByKey("Count")
            .toStream();
    wordCounts.to(stringSerde, longSerde, "wordCount");

    KafkaStreams streamManager = new KafkaStreams(streamTopology, kafkaStreamProperties);
    streamManager.start();

    Runtime.getRuntime().addShutdownHook(new Thread(streamManager::close));
}
 
Example #2
Source File: IPFraudKafkaStreamApp.java    From Building-Data-Streaming-Applications-with-Apache-Kafka with MIT License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    Properties kafkaStreamProperties = new Properties();
    kafkaStreamProperties.put(StreamsConfig.APPLICATION_ID_CONFIG, "IP-Fraud-Detection");
    kafkaStreamProperties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
    kafkaStreamProperties.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181");
    kafkaStreamProperties.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    kafkaStreamProperties.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());

    Serde<String> stringSerde = Serdes.String();

    KStreamBuilder fraudDetectionTopology = new KStreamBuilder();

    KStream<String, String> ipRecords = fraudDetectionTopology.stream(stringSerde, stringSerde, propertyReader.getPropertyValue("topic"));

    KStream<String, String> fraudIpRecords = ipRecords
            .filter((k, v) -> isFraud(v));

    fraudIpRecords.to(propertyReader.getPropertyValue("output_topic"));

    KafkaStreams streamManager = new KafkaStreams(fraudDetectionTopology, kafkaStreamProperties);
    streamManager.start();

    Runtime.getRuntime().addShutdownHook(new Thread(streamManager::close));
}
 
Example #3
Source File: MainVerticle.java    From kiqr with Apache License 2.0 6 votes vote down vote up
@Override
public void start(Future<Void> startFuture) throws Exception {
    Properties props = new Properties();
    props.put(StreamsConfig.APPLICATION_ID_CONFIG, "kiqr");
    props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
    props.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.StringSerde.class);
    props.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.LongSerde.class);
    props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, "0");

    KStreamBuilder builder = new KStreamBuilder();
    KTable<String, Long> table = builder.table(Serdes.String(), Serdes.Long(), "visits", "visitStore");
    KTable<Windowed<String>, Long> windowedCount = table.toStream().groupByKey().count(TimeWindows.of(60), "visitCount");





    vertx.deployVerticle(RestKiqrServerVerticle.Builder.serverBuilder(builder, props).withPort(2901).build(), res -> {
        if (res.succeeded()) {
            startFuture.complete();
        } else {
            startFuture.fail(res.cause());
        }
    });
}
 
Example #4
Source File: GeoLocationStreams.java    From Microservices-Deployment-Cookbook with MIT License 6 votes vote down vote up
@PostConstruct
public void init() {
	Map<String, Object> props = new HashMap<>();
	props.put(StreamsConfig.APPLICATION_ID_CONFIG, "geolocation-application");
	props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.99.100:9092");
	props.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
	props.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, GeoLocationSerdes.class.getName());

	StreamsConfig config = new StreamsConfig(props);
	KStreamBuilder builder = new KStreamBuilder();

	builder.stream("geolocationStreams").filter(new Predicate<Object, Object>() {
		@Override
		public boolean test(Object key, Object value) {
			GeoLocation geolocation = (GeoLocation) value;
			System.out.println("Stream received => " + value);
			return geolocation.getLatitude() >= -90 
					&& geolocation.getLatitude() < 90 
					&& geolocation.getLongitude() >= -180 
					&& geolocation.getLongitude() < 180;
		}
	}).to("geolocations");

	KafkaStreams streams = new KafkaStreams(builder, config);
	streams.start();
}
 
Example #5
Source File: KafkaStreamingMain.java    From kafka-streams-api-websockets with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        Properties props = new Properties();
        props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streaming-example");
        props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
        props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
        props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
        props.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1500);

//        To get data produced before process started
//        props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
//        props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);

        KStreamBuilder builder = new KStreamBuilder();

        KStream<String, String> source = builder.stream("data-in");

        KStream<String, String> stats = source.groupByKey()
                .aggregate(KafkaStreamingStatistics::new,
                    (k, v, clusterstats) -> clusterstats.add(v),
                    TimeWindows.of(60000).advanceBy(10000),
                    Serdes.serdeFrom(new MySerde(), new MySerde()),
                    "data-store")
                .toStream((key, value) -> key.key().toString() + " " + key.window().start())
                .mapValues((job) -> job.computeAvgTime().toString());

        stats.to(Serdes.String(), Serdes.String(),  "data-out");

        KafkaStreams streams = new KafkaStreams(builder, props);

        streams.cleanUp();
        streams.start();

        Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
    }
 
Example #6
Source File: KafkaStreamsLiveTest.java    From tutorials with MIT License 5 votes vote down vote up
@Test
@Ignore("it needs to have kafka broker running on local")
public void shouldTestKafkaStreams() throws InterruptedException {
    //given
    String inputTopic = "inputTopic";

    Properties streamsConfiguration = new Properties();
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-live-test");
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000);
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    // Use a temporary directory for storing state, which will be automatically removed after the test.
    streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());

    //when
    KStreamBuilder builder = new KStreamBuilder();
    KStream<String, String> textLines = builder.stream(inputTopic);
    Pattern pattern = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS);

    KTable<String, Long> wordCounts = textLines
            .flatMapValues(value -> Arrays.asList(pattern.split(value.toLowerCase())))
            .groupBy((key, word) -> word)
            .count();

    wordCounts.foreach((word, count) -> System.out.println("word: " + word + " -> " + count));

    String outputTopic = "outputTopic";
    final Serde<String> stringSerde = Serdes.String();
    final Serde<Long> longSerde = Serdes.Long();
    wordCounts.to(stringSerde, longSerde, outputTopic);

    KafkaStreams streams = new KafkaStreams(builder, streamsConfiguration);
    streams.start();

    //then
    Thread.sleep(30000);
    streams.close();
}
 
Example #7
Source File: ExclamationKafkaStream.java    From kafka-streams-ex with MIT License 5 votes vote down vote up
/** Connects the topic "console" with the topic "exclaimed", adding two
   *  exclamation points to the input values.
   * 
   * @param args Not used.
   */
  public static void main(String[] args) {
      
      // Configuration stuff.
      Properties config = new Properties();
     
      // For the cluster. Assumes everything is local.
      config.put(StreamsConfig.APPLICATION_ID_CONFIG, 
	"exclamation-kafka-streams");
      config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
      config.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181");
      
      // Serde.
config.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG,
	Serdes.ByteArray().getClass().getName());
config.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG,
	Serdes.String().getClass().getName());
      
      KStreamBuilder builder = new KStreamBuilder();
      
      // Read the stream from the topic into a KStream.
      KStream<byte[], String> text = builder.stream("console");
      
      // Apply the transformation.
      KStream<byte[], String> exclamation = 
          text.mapValues(x -> x + "!")
              .mapValues(x -> x + "!");
      
      // Sink it. Uses the configured serializers.
      exclamation.to("exclamated");
      
      // Build and run.
      KafkaStreams streams = new KafkaStreams(builder, config);
      
      streams.start();
  }
 
Example #8
Source File: StocksKafkaStreamsDriver.java    From kafka-streams with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {

        StreamsConfig streamingConfig = new StreamsConfig(getProperties());

        JsonSerializer<StockTransactionCollector> stockTransactionsSerializer = new JsonSerializer<>();
        JsonDeserializer<StockTransactionCollector> stockTransactionsDeserializer = new JsonDeserializer<>(StockTransactionCollector.class);
        JsonDeserializer<StockTransaction> stockTxnDeserializer = new JsonDeserializer<>(StockTransaction.class);
        JsonSerializer<StockTransaction> stockTxnJsonSerializer = new JsonSerializer<>();
        Serde<StockTransaction> transactionSerde = Serdes.serdeFrom(stockTxnJsonSerializer,stockTxnDeserializer);
        StringSerializer stringSerializer = new StringSerializer();
        StringDeserializer stringDeserializer = new StringDeserializer();
        Serde<String> stringSerde = Serdes.serdeFrom(stringSerializer,stringDeserializer);
        Serde<StockTransactionCollector> collectorSerde = Serdes.serdeFrom(stockTransactionsSerializer,stockTransactionsDeserializer);
        WindowedSerializer<String> windowedSerializer = new WindowedSerializer<>(stringSerializer);
        WindowedDeserializer<String> windowedDeserializer = new WindowedDeserializer<>(stringDeserializer);
        Serde<Windowed<String>> windowedSerde = Serdes.serdeFrom(windowedSerializer,windowedDeserializer);

        KStreamBuilder kStreamBuilder = new KStreamBuilder();


        KStream<String,StockTransaction> transactionKStream =  kStreamBuilder.stream(stringSerde,transactionSerde,"stocks");

        transactionKStream.map((k,v)-> new KeyValue<>(v.getSymbol(),v))
                          .through(stringSerde, transactionSerde,"stocks-out")
                          .groupBy((k,v) -> k, stringSerde, transactionSerde)
                          .aggregate(StockTransactionCollector::new,
                               (k, v, stockTransactionCollector) -> stockTransactionCollector.add(v),
                               TimeWindows.of(10000),
                               collectorSerde, "stock-summaries")
                .to(windowedSerde,collectorSerde,"transaction-summary");


        System.out.println("Starting StockStreams Example");
        KafkaStreams kafkaStreams = new KafkaStreams(kStreamBuilder,streamingConfig);
        kafkaStreams.start();
        System.out.println("Now started StockStreams Example");

    }
 
Example #9
Source File: TwitterStreamsAnalyzer.java    From kafka-streams with Apache License 2.0 5 votes vote down vote up
public void run()  {
    StreamsConfig streamsConfig = new StreamsConfig(getProperties());

    JsonSerializer<Tweet> tweetJsonSerializer = new JsonSerializer<>();
    JsonDeserializer<Tweet> tweetJsonDeserializer = new JsonDeserializer<>(Tweet.class);
    Serde<Tweet> tweetSerde = Serdes.serdeFrom(tweetJsonSerializer, tweetJsonDeserializer);

    KStreamBuilder kStreamBuilder = new KStreamBuilder();

    Classifier classifier = new Classifier();
    classifier.train(new File("src/main/resources/kafkaStreamsTwitterTrainingData_clean.csv"));

    KeyValueMapper<String, Tweet, String> languageToKey = (k, v) ->
       StringUtils.isNotBlank(v.getText()) ? classifier.classify(v.getText()):"unknown";

    Predicate<String, Tweet> isEnglish = (k, v) -> k.equals("english");
    Predicate<String, Tweet> isFrench =  (k, v) -> k.equals("french");
    Predicate<String, Tweet> isSpanish = (k, v) -> k.equals("spanish");

    KStream<String, Tweet> tweetKStream = kStreamBuilder.stream(Serdes.String(), tweetSerde, "twitterData");

    KStream<String, Tweet>[] filteredStreams = tweetKStream.selectKey(languageToKey).branch(isEnglish, isFrench, isSpanish);

    filteredStreams[0].to(Serdes.String(), tweetSerde, "english");
    filteredStreams[1].to(Serdes.String(), tweetSerde, "french");
    filteredStreams[2].to(Serdes.String(), tweetSerde, "spanish");

    kafkaStreams = new KafkaStreams(kStreamBuilder, streamsConfig);
    System.out.println("Starting twitter analysis streams");
    kafkaStreams.start();
    System.out.println("Started");

}
 
Example #10
Source File: CommandProcessor.java    From cqrs-manager-for-distributed-reactive-services with Apache License 2.0 5 votes vote down vote up
public void start() {
    KStreamBuilder builder = new KStreamBuilder();

    Serde<UUID> keySerde = new FressianSerde();
    Serde<Map> valSerde = new FressianSerde();

    KStream<UUID, Map> commands = builder.stream(keySerde, valSerde, commandsTopic);
    KStream<UUID, Map> customerEvents = commands
            .filter((id, command) -> command.get(new Keyword("action")).equals(new Keyword("create-customer")))
            .map((id, command) -> {
                logger.debug("Command received");
                Map userEvent = new HashMap(command);
                userEvent.put(new Keyword("action"), new Keyword("customer-created"));
                userEvent.put(new Keyword("parent"), id);
                Map userValue = (Map) userEvent.get(new Keyword("data"));
                userValue.put(new Keyword("id"), UUID.randomUUID());
                return new KeyValue<>(UUID.randomUUID(), userEvent);
    }).through(keySerde, valSerde, eventsTopic);

    KStream<UUID, Map> customers = customerEvents
            .map((id, event) -> {
                Map customer = (Map) event.get(new Keyword("data"));
                UUID customerId = (UUID) customer.get(new Keyword("id"));
                return new KeyValue<UUID, Map>(customerId, customer);
            });

    customers.through(keySerde, valSerde, customersTopic);

    StateStoreSupplier store = Stores.create("Customers")
            .withKeys(keySerde)
            .withValues(valSerde)
            .persistent()
            .build();
    builder.addStateStore(store);

    customers.process(customerStore, "Customers");

    this.kafkaStreams = new KafkaStreams(builder, kafkaStreamsConfig);
    this.kafkaStreams.start();
}
 
Example #11
Source File: WikipediaStreamDemo.java    From hello-kafka-streams with Apache License 2.0 5 votes vote down vote up
private static KafkaStreams createWikipediaStreamsInstance(String bootstrapServers) {
    final Serializer<JsonNode> jsonSerializer = new JsonSerializer();
    final Deserializer<JsonNode> jsonDeserializer = new JsonDeserializer();
    final Serde<JsonNode> jsonSerde = Serdes.serdeFrom(jsonSerializer, jsonDeserializer);

    KStreamBuilder builder = new KStreamBuilder();
    Properties props = new Properties();
    props.put(StreamsConfig.APPLICATION_ID_CONFIG, "wikipedia-streams");
    props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);


    KStream<JsonNode, JsonNode> wikipediaRaw = builder.stream(jsonSerde, jsonSerde, "wikipedia-raw");

    KStream<String, WikipediaMessage> wikipediaParsed =
            wikipediaRaw.map(WikipediaMessage::parceIRC)
                    .filter(WikipediaMessage::filterNonNull)
                    .through(Serdes.String(), new JsonPOJOSerde<>(WikipediaMessage.class), "wikipedia-parsed");

    KTable<String, Long> totalEditsByUser = wikipediaParsed
            .filter((key, value) -> value.type == WikipediaMessage.Type.EDIT)
            .countByKey(Serdes.String(), "wikipedia-edits-by-user");

    //some print
    totalEditsByUser.toStream().process(() -> new AbstractProcessor<String, Long>() {
        @Override
        public void process(String user, Long numEdits) {
            System.out.println("USER: " + user + " num.edits: " + numEdits);
        }
    });

    return new KafkaStreams(builder, props);

}
 
Example #12
Source File: Stream.java    From hdinsight-kafka-java-get-started with MIT License 5 votes vote down vote up
public static void main( String[] args ) {
    Properties streamsConfig = new Properties();
    // The name must be unique on the Kafka cluster
    streamsConfig.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-example");
    // Brokers
    streamsConfig.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, args[0]);
    // Zookeeper
    //streamsConfig.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, args[1]);
    // SerDes for key and values
    streamsConfig.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamsConfig.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());

    // Serdes for the word and count
    Serde<String> stringSerde = Serdes.String();
    Serde<Long> longSerde = Serdes.Long();

    KStreamBuilder builder = new KStreamBuilder();
    KStream<String, String> sentences = builder.stream(stringSerde, stringSerde, "test");
    KStream<String, Long> wordCounts = sentences
            .flatMapValues(value -> Arrays.asList(value.toLowerCase().split("\\W+")))
            .map((key, word) -> new KeyValue<>(word, word))
            .countByKey("Counts")
            .toStream();
    wordCounts.to(stringSerde, longSerde, "wordcounts");

    KafkaStreams streams = new KafkaStreams(builder, streamsConfig);
    streams.start();

    Runtime.getRuntime().addShutdownHook(new Thread(streams::close));
}
 
Example #13
Source File: GenericHttpServerTest.java    From kiqr with Apache License 2.0 5 votes vote down vote up
@Test
public void builderWithConfig(){
    RestKiqrServerVerticle.Builder builder = RestKiqrServerVerticle.Builder.serverBuilder(new KStreamBuilder(), new Properties());

    AbstractVerticle serverVerticle = builder.withOptions(new HttpServerOptions().setPort(4711)).build();
    assertThat(serverVerticle, is(instanceOf(RestKiqrServerVerticle.class)));

    RestKiqrServerVerticle server = (RestKiqrServerVerticle) serverVerticle;
    assertThat(server.serverOptions.getPort(), is(equalTo(4711)));

}
 
Example #14
Source File: GenericHttpServerTest.java    From kiqr with Apache License 2.0 5 votes vote down vote up
@Test
public void builderWithPort(){
    RestKiqrServerVerticle.Builder builder = RestKiqrServerVerticle.Builder.serverBuilder(new KStreamBuilder(), new Properties());

    AbstractVerticle serverVerticle = builder.withPort(4711).build();
    assertThat(serverVerticle, is(instanceOf(RestKiqrServerVerticle.class)));

    RestKiqrServerVerticle server = (RestKiqrServerVerticle) serverVerticle;
    assertThat(server.serverOptions.getPort(), is(equalTo(4711)));

}
 
Example #15
Source File: StatKStreamBuilderSupplier.java    From DBus with Apache License 2.0 4 votes vote down vote up
@Override
public KStreamBuilder get() {
    KStreamBuilder builder = new KStreamBuilder();
    KStream<String, String> stream = builder.stream((String[]) sources.toArray());

    KStream<HBKeySupplier.HBKey, String>[] streams =
            stream.filter((k, v) -> StringUtils.startsWith(v, "data_increment_heartbeat"))
                    .selectKey((k, v) -> new HBKeySupplier(k).get())
                    .filter((k, v) -> k.isNormalFormat)
                    .flatMapValues(v -> Arrays.asList("stat", "monitor"))
                    .branch((k, v) -> StringUtils.equalsIgnoreCase("stat", v),
                            (k, v) -> StringUtils.equalsIgnoreCase("monitor", v));

    streams[0].transform(StatTransformer::new).to(sink);
    KStream<String, PacketVo> monitor =
            streams[1].filterNot((k, v) -> !StringUtils.equalsIgnoreCase("abort", k.getStatus()))
                    .map((k, v) -> {
                        StringJoiner joiner = new StringJoiner("/");
                        joiner.add(HeartBeatConfigContainer.getInstance().getHbConf().getMonitorPath())
                                .add(k.getDs())
                                .add(StringUtils.equalsIgnoreCase(DbusDatasourceType.DB2.name(), k.getDbType()) ? StringUtils.upperCase(k.getSchema()) : k.getSchema())
                                .add(k.getTable()).add(String.valueOf(k.getPartition()));

                        String node = joiner.toString();
                        PacketVo packet = new PacketVo();
                        packet.setNode(node);
                        packet.setType("checkpoint");
                        packet.setTime(k.getCheckpointMs());
                        packet.setTxTime(k.getTxTimeMs());
                        return new KeyValue(node, packet);
                    });

    // 需要先进行shuff把key相同的分配到partition号
    monitor.through("monitor-repartition")
            .reduceByKey((agg, v) -> v.getTime() > agg.getTime() ? v : agg, TimeWindows.of("monitor", 2 * 60 * 1000))
            .toStream()
            .map((k, v) -> new KeyValue<>(k.key(), v))
            .process(new MonitorProcessorSupplier(), "zkInfo");

    return builder;
}
 
Example #16
Source File: SpecificClientIntegrationITCase.java    From kiqr with Apache License 2.0 4 votes vote down vote up
@BeforeClass
public static void produceMessages() throws Exception{
    Properties producerProps = new Properties();
    producerProps.put("bootstrap.servers", KAFKA_HOST + ":" + KAFKA_PORT);
    producerProps.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
    producerProps.put("value.serializer", "org.apache.kafka.common.serialization.LongSerializer");
    producerProps.put("linger.ms", 0);


    try(KafkaProducer<String, Long> producer = new KafkaProducer<>(producerProps)){
        producer.send(new ProducerRecord<String, Long>(TOPIC, 0, 0L, "key1", 1L));
        producer.send(new ProducerRecord<String, Long>(TOPIC, 0, 100L, "key1", 2L));
        producer.send(new ProducerRecord<String, Long>(TOPIC, 0, 100000L, "key1", 3L));


        producer.send(new ProducerRecord<String, Long>(TOPIC, 0, 0L, "key2", 4L));
        producer.send(new ProducerRecord<String, Long>(TOPIC, 0, 100000L, "key2", 5L));
        producer.send(new ProducerRecord<String, Long>(TOPIC, 0, 100001L, "key2", 6L));

        producer.send(new ProducerRecord<String, Long>(TOPIC, 0, 0L, "key3", 7L));
        producer.send(new ProducerRecord<String, Long>(TOPIC, 0, 50000L, "key3", 8L));
        producer.send(new ProducerRecord<String, Long>(TOPIC, 0, 100001L, "key3", 9L));


        producer.send(new ProducerRecord<String, Long>(TOPIC, 0, 0L, "key4", 10L));
        producer.send(new ProducerRecord<String, Long>(TOPIC, 0, 1L, "key4", 11L));
        producer.send(new ProducerRecord<String, Long>(TOPIC, 0, 2L, "key4", 12L));

    }

    CountDownLatch cdl = new CountDownLatch(12);


    Properties consumerProps = new Properties();
    consumerProps.put("bootstrap.servers",  KAFKA_HOST + ":" + KAFKA_PORT);
    consumerProps.put("group.id", UUID.randomUUID().toString());
    consumerProps.put("enable.auto.commit", "true");
    consumerProps.put("auto.offset.reset", "earliest");
    consumerProps.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    consumerProps.put("value.deserializer", "org.apache.kafka.common.serialization.LongDeserializer");


    Runnable consumerRunnable = () -> {
        KafkaConsumer<String, Long> consumer = new KafkaConsumer<>(consumerProps);

        consumer.subscribe(Collections.singleton(TOPIC));

        int tryCount = 10;
        while(true){
            ConsumerRecords<String, Long> records = consumer.poll(500);
            records.forEach(rec -> cdl.countDown());

            tryCount--;
            if(cdl.getCount() == 0){
                consumer.close();
                return;
            } else if(tryCount == 0){
                throw new RuntimeException("times up");
            }
        }
    };

    consumerRunnable.run();

    cdl.await(10000, TimeUnit.MILLISECONDS);


    KStreamBuilder builder = new KStreamBuilder();
    KTable<String, Long> kv = builder.table(Serdes.String(), Serdes.Long(), TOPIC, "kv");

    kv.toStream().groupByKey().count(TimeWindows.of(10000L), "window");

    Properties streamProps = new Properties();
    streamProps.put(StreamsConfig.APPLICATION_ID_CONFIG, UUID.randomUUID().toString());
    streamProps.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG,  KAFKA_HOST + ":" + KAFKA_PORT);
    streamProps.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
    streamProps.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    streamProps.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.Long().getClass().getName());

    CountDownLatch streamCdl = new CountDownLatch(2);


    RestKiqrServerVerticle.Builder verticleBuilder = RestKiqrServerVerticle.Builder.serverBuilder(builder, streamProps);
    RuntimeVerticle.Builder builder1 = verticleBuilder.withPort(44321).withStateListener((newState, oldState) -> {
        if (newState == KafkaStreams.State.RUNNING) streamCdl.countDown();
        System.out.println(oldState + " - " + newState);
    });

    AbstractVerticle verticle = verticleBuilder.build();

    CountDownLatch verticleCdl = new CountDownLatch(1);
    VERTX.deployVerticle(verticle, handler -> {
        verticleCdl.countDown();
    });

    streamCdl.await(100000, TimeUnit.MILLISECONDS);
    verticleCdl.await(100000, TimeUnit.MILLISECONDS);

}
 
Example #17
Source File: KStreamBuilderSmokeTest.java    From kafka-junit with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Integration test validates that streams can be used against KafkaTestServer.
 */
@Test
void testStreamConsumer() throws Exception {
    // Define topic to test with.
    final String inputTopic = "stream-input-topic" + System.currentTimeMillis();
    final String outputTopic = "stream-output-topic" + System.currentTimeMillis();

    // Define how many records
    final int numberOfRecords = 25;
    final int partitionId = 0;

    // Tracks how many records the Stream consumer has processed.
    final AtomicInteger recordCounter = new AtomicInteger(0);

    // Create our test server instance.
    try (final KafkaTestServer kafkaTestServer = new KafkaTestServer()) {
        // Start it and create our topic.
        kafkaTestServer.start();

        // Create test utils instance.
        final KafkaTestUtils kafkaTestUtils = new KafkaTestUtils(kafkaTestServer);

        // Create topics
        kafkaTestUtils.createTopic(inputTopic, 1, (short) 1);
        kafkaTestUtils.createTopic(outputTopic, 1, (short) 1);

        // Produce random data into input topic
        kafkaTestUtils.produceRecords(numberOfRecords, inputTopic, partitionId);

        // Define stream consumer properties.
        final Properties config = new Properties();
        config.put(StreamsConfig.APPLICATION_ID_CONFIG, "testStreamProcessor");
        config.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, kafkaTestServer.getKafkaConnectString());
        config.put("group.id", "test-stream-group");
        config.put("auto.offset.reset", "earliest");

        // Serialization definition.
        final Serde<String> stringSerde = Serdes.String();

        // Build the stream
        final KStreamBuilder kStreamBuilder = new KStreamBuilder();
        kStreamBuilder
            // Read from input topic.
            .stream(stringSerde, stringSerde, inputTopic)

            // For each record processed, increment our counter
            .map((key, word) -> {
                recordCounter.incrementAndGet();
                return new KeyValue<>(word, word);
            })

            // Write to output topic.
            .to(stringSerde, stringSerde, outputTopic);

        // Create stream
        final KafkaStreams kafkaStreams = new KafkaStreams(kStreamBuilder, new StreamsConfig(config));
        try {
            // Start the stream consumer
            kafkaStreams.start();

            // Since stream processing is async, we need to wait for the Stream processor to start, consume messages
            // from the input topic, and process them. We'll wait for Wait for it to do its thing up to 10 seconds.
            for (int timeoutCounter = 0; timeoutCounter <= 10; timeoutCounter++) {
                // If we've processed all of our records
                if (recordCounter.get() >= numberOfRecords) {
                    // Break out of sleep loop.
                    break;
                }
                // Otherwise, we need to wait longer, sleep 1 second.
                Thread.sleep(1000L);
            }
        } finally {
            // Close the stream consumer.
            kafkaStreams.close();
        }

        // Validation.
        Assertions.assertEquals(numberOfRecords, recordCounter.get(), "Should have 25 records processed");

        // Consume records from output topic.
        final List<ConsumerRecord<String, String>> outputRecords =
            kafkaTestUtils.consumeAllRecordsFromTopic(outputTopic, StringDeserializer.class, StringDeserializer.class);

        // Validate we got the correct number of records.
        Assertions.assertEquals(numberOfRecords, outputRecords.size());
    }
}
 
Example #18
Source File: TumblingWindowKafkaStream.java    From kafka-streams-ex with MIT License 4 votes vote down vote up
/** Runs the streams program, writing to the "long-counts-all" topic.
 *
 * @param args Not used.
 */
public static void main(String[] args) throws Exception {
    
    Properties config = new Properties();

    config.put(StreamsConfig.APPLICATION_ID_CONFIG,
        "tumbling-window-kafka-streams");
    config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG,
        "localhost:9092");
    config.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG,
        "localhost:2181");
    config.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG,
        Serdes.ByteArray().getClass().getName());
    config.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG,
        Serdes.Long().getClass().getName());

    KStreamBuilder builder = new KStreamBuilder();

    KStream<byte[], Long> longs = builder.stream(
        Serdes.ByteArray(), Serdes.Long(), "longs");

    // The tumbling windows will clear every ten seconds.
    KTable<Windowed<byte[]>, Long> longCounts =
        longs.groupByKey()
             .count(TimeWindows.of(10000L)
                               .until(10000L),
                    "long-counts");

    // Write to topics.
    longCounts.toStream((k,v) -> k.key())
              .to(Serdes.ByteArray(),
                  Serdes.Long(),
                  "long-counts-all");

    KafkaStreams streams = new KafkaStreams(builder, config);
    streams.start();

    // Now generate the data and write to the topic.
    Properties producerConfig = new Properties();
    producerConfig.put("bootstrap.servers", "localhost:9092");
    producerConfig.put("key.serializer",
                       "org.apache.kafka.common" +
                       ".serialization.ByteArraySerializer");
    producerConfig.put("value.serializer",
                       "org.apache.kafka.common" +
                       ".serialization.LongSerializer");

    KafkaProducer producer = 
        new KafkaProducer<byte[], Long>(producerConfig);

    Random rng = new Random(12345L);

    while(true) { 
        producer.send(new ProducerRecord<byte[], Long>(
            "longs", "A".getBytes(), rng.nextLong()%10));
        Thread.sleep(500L);
    } // Close infinite data generating loop.
}
 
Example #19
Source File: ExclamationAdvancedKafkaStream.java    From kafka-streams-ex with MIT License 4 votes vote down vote up
/** Connects the topic "console" to two topics, adds 2-4 exclamation points,
 *  writing all messages to the "exclamated" topic and the messages with
 *  four exclamation points to the "much-exclamated" topic.
 * 
 * @param args Not used. 
 */
public static void main(String[] args) {
    
    // Configuration stuff.
    Properties config = new Properties();
    
    // For the cluster. Assumes everything is local.
    config.put(StreamsConfig.APPLICATION_ID_CONFIG, 
        "exclamation-advanced-kafka-streams");
    config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
    config.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181");
    
    // Serde.
    config.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG,
        Serdes.ByteArray().getClass().getName());
    config.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG,
        Serdes.String().getClass().getName());
    
    KStreamBuilder builder = new KStreamBuilder();
    
    // Read the stream from the topic into a KStream.
    KStream<byte[], String> text = builder.stream("console");
    
    // Apply the transformations.
    KStream<byte[], String> exclamation = 
        text.mapValues(x -> x + getExclamations())
            .mapValues(x -> x + getExclamations());
    
    KStream<byte[], String> muchExclamation = 
        exclamation.filter((k,v) -> v.endsWith("!!!!"));
    
    // Sink them both.
    exclamation.to("exclamated");
    muchExclamation.to("much-exclamated");
    
    // Build and run.
    KafkaStreams streams = new KafkaStreams(builder, config);
    
    streams.start();
    
}
 
Example #20
Source File: HoppingWindowKafkaStream.java    From kafka-streams-ex with MIT License 4 votes vote down vote up
/** Runs the streams program, writing to the "long-counts-all" topic.
 *
 * @param args Not used.
 */
public static void main(String[] args) throws Exception{
    
    Properties config = new Properties();
    
    config.put(StreamsConfig.APPLICATION_ID_CONFIG,
        "hopping-window-kafka-streams");
    config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
    config.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181");
    config.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG,
        Serdes.ByteArray().getClass().getName());
    config.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG,
        Serdes.String().getClass().getName());
    
    KStreamBuilder builder = new KStreamBuilder();
    
    KStream<byte[], Long> longs = builder.stream(
        Serdes.ByteArray(), Serdes.Long(), "longs");
    
    // The hopping windows will count the last second, two seconds,
    // three seconds, etc until the last ten seconds of data are in the
    // windows.
    KTable<Windowed<byte[]>, Long> longCounts = 
        longs.groupByKey()
              .count(TimeWindows.of(10000L)
                                .advanceBy(1000L)
                                .until(10000L),
                     "long-counts");
                                    
    // Write to output topic.
    longCounts.toStream((k,v) -> k.key())
              .map((k,v) -> KeyValue.pair(k, v))
              .to(Serdes.ByteArray(),
                  Serdes.Long(),
                  "long-counts-all");
    
    KafkaStreams streams = new KafkaStreams(builder, config);
    streams.start();
    
    // Now generate the data and write to the topic.
    Properties producerConfig = new Properties();
    producerConfig.put("bootstrap.servers", "localhost:9092");
    producerConfig.put("key.serializer",
                       "org.apache.kafka.common" +
                       ".serialization.ByteArraySerializer");
    producerConfig.put("value.serializer",
                       "org.apache.kafka.common" +
                       ".serialization.LongSerializer");
    KafkaProducer producer = 
        new KafkaProducer<byte[], Long>(producerConfig);
    
    Random rng = new Random(12345L);
    
    while(true) {
        producer.send(new ProducerRecord<byte[], Long>(
            "longs", "A".getBytes(), rng.nextLong()%10));
        Thread.sleep(500L);
    } // Close infinite loop generating data.
}
 
Example #21
Source File: KTableKafkaStream.java    From kafka-streams-ex with MIT License 4 votes vote down vote up
/** Runs the streams program (which produces its own data), writing
 *  to the "longs-table", "longs-table-out", "longs-stream-out" topics.
 *
 * @param args Not used.
 */
public static void main(String[] args) throws Exception {
    
    Properties config = new Properties();

    config.put(StreamsConfig.APPLICATION_ID_CONFIG,
        "ktable-kafka-stream");
    config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
    config.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181");

    config.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG,
        Serdes.String().getClass().getName());
    config.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG,
        Serdes.Long().getClass().getName());

    KStreamBuilder builder = new KStreamBuilder();
    
    KTable<String, Long> longs_table = builder.table("longs-table");

    longs_table.to("longs-table-out");

    // Convert to a stream and output to see what happens.
    longs_table.toStream().to("longs-stream-out");

    KafkaStreams streams = new KafkaStreams(builder, config);
    streams.start();

    Properties producerConfig = new Properties();
    producerConfig.put("bootstrap.servers", "localhost:9092");
    producerConfig.put("key.serializer", 
        "org.apache.kafka.common.serialization.StringSerializer");
    producerConfig.put("value.serializer",
        "org.apache.kafka.common.serialization.LongSerializer");

    KafkaProducer<String, Long> producer = 
        new KafkaProducer<String, Long>(producerConfig);

    Random rng = new Random(12345L);
    String[] keys = {"A"}; // Can change to make a more complicated example.
    Long[] values = {1L, 2L, 3L};

    try {

        while(true) {
            String key = keys[rng.nextInt(keys.length)];
            Long value = values[rng.nextInt(values.length)];
            producer.send(
                new ProducerRecord<String, Long>("longs-table", 
                                                 key, 
                                                 value));
            Thread.sleep(1000L);
        } // Close while loop for generating the data. 
    
    } catch(InterruptedException e) {
        producer.close();
    } // Close try/catch around data production.
}
 
Example #22
Source File: NotLookingAtFacebook.java    From kafka-streams-ex with MIT License 4 votes vote down vote up
/** Runs the streams program, writing to the "notifications" and 
 *  "metrics" topics.
 *
 * @param args Not used.
 */
public static void main(String[] args) throws Exception {
    
    Properties config = new Properties();

    config.put(StreamsConfig.APPLICATION_ID_CONFIG,
        "not-looking-at-facebook");
    config.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
    config.put(StreamsConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181");

    config.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, 
        Serdes.String().getClass().getName());
    config.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, 
        Serdes.String().getClass().getName());

    Properties producerConfig = new Properties();
    producerConfig.put("bootstrap.servers", "localhost:9092");
    producerConfig.put("key.serializer",
        "org.apache.kafka.common.serialization.StringSerializer");
    producerConfig.put("value.serializer",
        "org.apache.kafka.common.serialization.StringSerializer");

    String[] users = {"Doyin", "George", "Mark"};
    
    // Build the topology.
    KStreamBuilder builder = new KStreamBuilder();

    KTable<String, String> logons = builder.table("logons");
    KStream<String, String> ticks = builder.stream("ticks");

    KStream<String, String> notifications = 
        ticks.leftJoin(logons, (nv, lv) -> new String[] {nv, lv})
             // Filter out any nulls.
             .filter((k,v) -> v[1] != null)
             // Filter out anyone who's logged on.
             .filter((k,v) -> v[1] != "LOGON")
             // Now set the message.
             .mapValues(v -> "You are not currently viewing Facebook.");

    // Implement the metrics.
    KTable<Windowed<String>, Long> notificationCounts = 
        notifications.countByKey(
            // Create a one minute window.
            TimeWindows.of("notificationCounts", 60000L)
                        // Hop by ten seconds.
                       .advanceBy(10000L)
                       // Don't hang on to old values.
                       .until(60000L));

    // Convert notificationCounts to a stream, extract the key (ignore
    // the embedded time information), and sink to the "metrics" topic.
    notificationCounts.toStream((k,v) -> k.key())
                      .to(Serdes.String(),
                          Serdes.Long(),
                          "metrics");

    // Nuisance delivered. You're welcome.
    notifications.to("notifications");

    // Start producing logon messages.
    new Thread(new LogonGenerator(users, producerConfig)).start();

    // Start producing notifications.
    new Thread(new TickGenerator(users, producerConfig)).start();

    // Start the streams.
    KafkaStreams streams = new KafkaStreams(builder, config);
    streams.start();

}
 
Example #23
Source File: RestKiqrServerVerticle.java    From kiqr with Apache License 2.0 4 votes vote down vote up
public static Builder serverBuilder(KStreamBuilder builder) {
    return new Builder(builder);
}
 
Example #24
Source File: RestKiqrServerVerticle.java    From kiqr with Apache License 2.0 4 votes vote down vote up
public static Builder serverBuilder(KStreamBuilder builder, Properties props) {
    return new Builder(builder, props);
}
 
Example #25
Source File: RegexTest.java    From kafka-streams with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {

        StreamsConfig streamingConfig = new StreamsConfig(getProperties());
        KStreamBuilder kStreamBuilder = new KStreamBuilder();


        KStream<String, String> patternStreamI = kStreamBuilder.stream(Serdes.String(), Serdes.String(), Pattern.compile("topic-\\d"));
        KStream<String, String> namedTopicKStream = kStreamBuilder.stream(Serdes.String(), Serdes.String(), "topic-Z");
        KStream<String, String> patternStreamII = kStreamBuilder.stream(Serdes.String(), Serdes.String(), Pattern.compile("topic-[A-Y]+"));

        patternStreamI.print("pattern-\\d");
        namedTopicKStream.print("topic-Z");
        patternStreamII.print("topic-[A-Y]+");


        System.out.println("Starting stream regex consumer Example");
        KafkaStreams kafkaStreams = new KafkaStreams(kStreamBuilder, streamingConfig);
        kafkaStreams.start();


    }
 
Example #26
Source File: RestKiqrServerVerticle.java    From kiqr with Apache License 2.0 4 votes vote down vote up
protected Builder(KStreamBuilder builder) {
    super(builder);
}
 
Example #27
Source File: RestKiqrServerVerticle.java    From kiqr with Apache License 2.0 4 votes vote down vote up
protected Builder(KStreamBuilder builder, Properties properties) {
    super(builder, properties);
}
 
Example #28
Source File: WordCountExample.java    From kafka-streams-wordcount with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception{

        Properties props = new Properties();
        props.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount");
        props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
        props.put(StreamsConfig.KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
        props.put(StreamsConfig.VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());

        // setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
        // Note: To re-run the demo, you need to use the offset reset tool:
        // https://cwiki.apache.org/confluence/display/KAFKA/Kafka+Streams+Application+Reset+Tool
        props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");

        // work-around for an issue around timing of creating internal topics
        // Fixed in Kafka 0.10.2.0
        // don't use in large production apps - this increases network load
        // props.put(CommonClientConfigs.METADATA_MAX_AGE_CONFIG, 500);

        KStreamBuilder builder = new KStreamBuilder();

        KStream<String, String> source = builder.stream("wordcount-input");


        final Pattern pattern = Pattern.compile("\\W+");
        KStream counts  = source.flatMapValues(value-> Arrays.asList(pattern.split(value.toLowerCase())))
                .map((key, value) -> new KeyValue<Object, Object>(value, value))
                .filter((key, value) -> (!value.equals("the")))
                .groupByKey()
                .count("CountStore").mapValues(value->Long.toString(value)).toStream();
        counts.to("wordcount-output");

        KafkaStreams streams = new KafkaStreams(builder, props);

        // This is for reset to work. Don't use in production - it causes the app to re-load the state from Kafka on every start
        streams.cleanUp();

        streams.start();

        // usually the stream application would be running forever,
        // in this example we just let it run for some time and stop since the input data is finite.
        Thread.sleep(5000L);

        streams.close();

    }
 
Example #29
Source File: RuntimeVerticleTest.java    From kiqr with Apache License 2.0 4 votes vote down vote up
@Test
public void successfulStart(TestContext context){

    KafkaStreams streamsMock = mock(KafkaStreams.class);
    KStreamBuilder builderMock = mock(KStreamBuilder.class);
    Properties props = new Properties();
    RuntimeVerticle verticleSpy = spy(new RuntimeVerticle(builderMock, props, null));

    doReturn(streamsMock).when(verticleSpy).createAndStartStream();

    rule.vertx().deployVerticle(verticleSpy, context.asyncAssertSuccess(handler -> {


        context.assertTrue(rule.vertx().deploymentIDs().size() > 0);
    }));

}
 
Example #30
Source File: RuntimeVerticle.java    From kiqr with Apache License 2.0 4 votes vote down vote up
protected RuntimeVerticle(KStreamBuilder builder, Properties props, KafkaStreams.StateListener listener) {
    this.builder = builder;
    this.props = props;
    this.listener = listener;
}