org.apache.kafka.streams.kstream.Joined Java Examples

The following examples show how to use org.apache.kafka.streams.kstream.Joined. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: KGraph.java    From kafka-graphs with Apache License 2.0 6 votes vote down vote up
public KGraph<K, VV, EV> subgraph(Predicate<K, VV> vertexFilter, Predicate<Edge<K>, EV> edgeFilter) {
    KTable<K, VV> filteredVertices = vertices.filter(vertexFilter);

    KTable<Edge<K>, EV> remainingEdges = edgesBySource()
        .join(filteredVertices, (e, v) -> e, Joined.with(keySerde(), new KryoSerde<>(), vertexValueSerde()))
        .map((k, edge) -> new KeyValue<>(edge.target(), edge))
        .join(filteredVertices, (e, v) -> e, Joined.with(keySerde(), new KryoSerde<>(), vertexValueSerde()))
        .map((k, edge) -> new KeyValue<>(new Edge<>(edge.source(), edge.target()), edge.value()))
        .groupByKey(Grouped.with(new KryoSerde<>(), edgeValueSerde()))
        .reduce((v1, v2) -> v2, Materialized.with(new KryoSerde<>(), edgeValueSerde()));

    KTable<Edge<K>, EV> filteredEdges = remainingEdges
        .filter(edgeFilter, Materialized.<Edge<K>, EV, KeyValueStore<Bytes, byte[]>>as(generateStoreName()).withKeySerde(new KryoSerde<>()).withValueSerde(edgeValueSerde()));

    return new KGraph<>(filteredVertices, filteredEdges, serialized);
}
 
Example #2
Source File: StreamToTableJoinIntegrationTests.java    From spring-cloud-stream-binder-kafka with Apache License 2.0 6 votes vote down vote up
@StreamListener
@SendTo("output")
public KStream<String, Long> process(
		@Input("input") KStream<String, Long> userClicksStream,
		@Input("input-x") KTable<String, String> userRegionsTable) {

	return userClicksStream
			.leftJoin(userRegionsTable,
					(clicks, region) -> new RegionWithClicks(
							region == null ? "UNKNOWN" : region, clicks),
					Joined.with(Serdes.String(), Serdes.Long(), null))
			.map((user, regionWithClicks) -> new KeyValue<>(
					regionWithClicks.getRegion(), regionWithClicks.getClicks()))
			.groupByKey(Serialized.with(Serdes.String(), Serdes.Long()))
			.reduce(Long::sum)
			.toStream();
}
 
Example #3
Source File: EventSourcedStreams.java    From simplesource with Apache License 2.0 6 votes vote down vote up
static <K, C, E, A> Tuple2<KStream<K, CommandRequest<K, C>>, KStream<K, CommandResponse<K>>> getProcessedCommands(
        TopologyContext<K, C, E, A> ctx,
        final KStream<K, CommandRequest<K, C>> commandRequestStream,
        final KStream<K, CommandResponse<K>> commandResponseStream) {

    final KTable<CommandId, CommandResponse<K>> commandResponseById = commandResponseStream
            .selectKey((key, response) -> response.commandId())
            .groupByKey(Serialized.with(ctx.serdes().commandId(), ctx.serdes().commandResponse()))
            .reduce((r1, r2) -> getResponseSequence(r1) > getResponseSequence(r2) ? r1 : r2);

    final KStream<K, Tuple2<CommandRequest<K, C>, CommandResponse<K>>> reqResp = commandRequestStream
            .selectKey((k, v) -> v.commandId())
            .leftJoin(commandResponseById, Tuple2::new, Joined.with(ctx.serdes().commandId(), ctx.serdes().commandRequest(), ctx.serdes().commandResponse()))
            .selectKey((k, v) -> v.v1().aggregateKey());

    KStream<K, Tuple2<CommandRequest<K, C>, CommandResponse<K>>>[] branches =
            reqResp.branch((k, tuple) -> tuple.v2() == null, (k, tuple) -> tuple.v2() != null);

    KStream<K, CommandRequest<K, C>> unProcessed = branches[0].mapValues((k, tuple) -> tuple.v1());

    KStream<K, CommandResponse<K>> processed = branches[1].mapValues((k, tuple) -> tuple.v2())
            .peek((k, r) -> logger.info("Preprocessed: {}=CommandId:{}", k, r.commandId()));

    return new Tuple2<>(unProcessed, processed);
}
 
Example #4
Source File: KafkaStreamsTableJoin.java    From spring-cloud-stream-samples with Apache License 2.0 5 votes vote down vote up
@Bean
public BiFunction<KStream<String, Long>, KTable<String, String>, KStream<String, Long>> process() {

	return (userClicksStream, userRegionsTable) -> userClicksStream
			.leftJoin(userRegionsTable,
					(clicks, region) -> new RegionWithClicks(region == null ? "UNKNOWN" : region, clicks),
					Joined.with(Serdes.String(), Serdes.Long(), null))
			.map((user, regionWithClicks) -> new KeyValue<>(regionWithClicks.getRegion(), regionWithClicks.getClicks()))
			.groupByKey(Grouped.with(Serdes.String(), Serdes.Long()))
			.reduce((firstClicks, secondClicks) -> firstClicks + secondClicks)
			.toStream();
}
 
Example #5
Source File: ErrorEventsPerMinute.java    From fluent-kafka-streams-tests with MIT License 5 votes vote down vote up
public Topology getTopology() {
    final StreamsBuilder builder = new StreamsBuilder();

    // Click Events
    final KStream<Integer, ClickEvent> clickEvents = builder.stream(this.clickInputTopic,
            Consumed.with(Serdes.Integer(), new JsonSerde<>(ClickEvent.class)));

    final KTable<Windowed<Integer>, Long> counts = clickEvents
            .selectKey(((key, value) -> value.getStatus()))
            .filter(((key, value) -> key >= 400))
            .groupByKey(Grouped.with(Serdes.Integer(), new JsonSerde<>(ClickEvent.class)))
            .windowedBy(TimeWindows.of(Duration.ofMinutes(1)))  // 1 Minute in ms
            .count();

    // Status codes
    final KTable<Integer, StatusCode> statusCodes = builder.table(this.statusInputTopic,
            Consumed.with(Serdes.Integer(), new JsonSerde<>(StatusCode.class)));

    // Join
    final KStream<Integer, ErrorOutput> errors = counts.toStream()
            .map((key, value) -> KeyValue.pair(
                    key.key(),
                    new ErrorOutput(key.key(), value, key.window().start(), null /*empty definition*/)))
            .join(statusCodes,
                    (countRecord, code) -> new ErrorOutput(
                            countRecord.getStatusCode(), countRecord.getCount(), countRecord.getTime(), code.getDefinition()),
                    Joined.valueSerde(new JsonSerde<>(ErrorOutput.class)));
    errors.to(this.errorOutputTopic);

    // Send alert if more than 5x a certain error code per minute
    errors.filter((key, errorOutput) -> errorOutput.getCount() > 5L).to(this.alertTopic);

    return builder.build();
}
 
Example #6
Source File: StreamToTableJoinFunctionTests.java    From spring-cloud-stream-binder-kafka with Apache License 2.0 5 votes vote down vote up
@Bean
public BiFunction<KStream<String, Long>, KTable<String, String>, KStream<String, Long>> process() {
	return (userClicksStream, userRegionsTable) -> (userClicksStream
			.leftJoin(userRegionsTable, (clicks, region) -> new RegionWithClicks(region == null ?
							"UNKNOWN" : region, clicks),
					Joined.with(Serdes.String(), Serdes.Long(), null))
			.map((user, regionWithClicks) -> new KeyValue<>(regionWithClicks.getRegion(),
					regionWithClicks.getClicks()))
			.groupByKey(Grouped.with(Serdes.String(), Serdes.Long()))
			.reduce(Long::sum)
			.toStream());
}
 
Example #7
Source File: StreamToTableJoinFunctionTests.java    From spring-cloud-stream-binder-kafka with Apache License 2.0 5 votes vote down vote up
@Bean
public Function<KStream<String, Long>, Function<KTable<String, String>, KStream<String, Long>>> process() {
	return userClicksStream -> (userRegionsTable -> (userClicksStream
			.leftJoin(userRegionsTable, (clicks, region) -> new RegionWithClicks(region == null ?
							"UNKNOWN" : region, clicks),
					Joined.with(Serdes.String(), Serdes.Long(), null))
			.map((user, regionWithClicks) -> new KeyValue<>(regionWithClicks.getRegion(),
					regionWithClicks.getClicks()))
			.groupByKey(Grouped.with(Serdes.String(), Serdes.Long()))
			.reduce(Long::sum)
			.toStream()));
}
 
Example #8
Source File: StreamToTableJoinIntegrationTests.java    From spring-cloud-stream-binder-kafka with Apache License 2.0 5 votes vote down vote up
@StreamListener
public void testProcessor(
		@Input(BindingsForTwoKStreamJoinTest.INPUT_1) KStream<String, String> input1Stream,
		@Input(BindingsForTwoKStreamJoinTest.INPUT_2) KStream<String, String> input2Stream) {
	input1Stream
			.join(input2Stream,
					(event1, event2) -> null,
					JoinWindows.of(TimeUnit.MINUTES.toMillis(5)),
					Joined.with(
							Serdes.String(),
							Serdes.String(),
							Serdes.String()
					)
			);
}
 
Example #9
Source File: KafkaStreamsPipeline.java    From quarkus with Apache License 2.0 5 votes vote down vote up
@Produces
public Topology buildTopology() {
    StreamsBuilder builder = new StreamsBuilder();

    ObjectMapperSerde<Category> categorySerde = new ObjectMapperSerde<>(Category.class);
    ObjectMapperSerde<Customer> customerSerde = new ObjectMapperSerde<>(Customer.class);
    ObjectMapperSerde<EnrichedCustomer> enrichedCustomerSerde = new ObjectMapperSerde<>(EnrichedCustomer.class);

    KTable<Integer, Category> categories = builder.table(
            "streams-test-categories",
            Consumed.with(Serdes.Integer(), categorySerde));

    KStream<Integer, EnrichedCustomer> customers = builder
            .stream("streams-test-customers", Consumed.with(Serdes.Integer(), customerSerde))
            .selectKey((id, customer) -> customer.category)
            .join(
                    categories,
                    (customer, category) -> {
                        return new EnrichedCustomer(customer.id, customer.name, category);
                    },
                    Joined.with(Serdes.Integer(), customerSerde, categorySerde));

    KeyValueBytesStoreSupplier storeSupplier = Stores.inMemoryKeyValueStore("countstore");
    customers.groupByKey()
            .count(Materialized.<Integer, Long> as(storeSupplier));

    customers.selectKey((categoryId, customer) -> customer.id)
            .to("streams-test-customers-processed", Produced.with(Serdes.Integer(), enrichedCustomerSerde));

    return builder.build();
}
 
Example #10
Source File: EventSourcedStreams.java    From simplesource with Apache License 2.0 5 votes vote down vote up
static <K, C, E, A> KStream<K, CommandEvents<E, A>> getCommandEvents(
        TopologyContext<K, C, E, A> ctx,
        final KStream<K, CommandRequest<K, C>> commandRequestStream,
        final KTable<K, AggregateUpdate<A>> aggregateTable) {
    return commandRequestStream.leftJoin(aggregateTable, (r, a) -> CommandRequestTransformer.getCommandEvents(ctx, a, r),
            Joined.with(ctx.serdes().aggregateKey(),
                    ctx.serdes().commandRequest(),
                    ctx.serdes().aggregateUpdate()));
}
 
Example #11
Source File: KGraph.java    From kafka-graphs with Apache License 2.0 5 votes vote down vote up
public KGraph<K, VV, EV> filterOnVertices(Predicate<K, VV> vertexFilter) {
    KTable<K, VV> filteredVertices = vertices.filter(vertexFilter);

    KTable<Edge<K>, EV> remainingEdges = edgesBySource()
        .join(filteredVertices, (e, v) -> e, Joined.with(keySerde(), new KryoSerde<>(), vertexValueSerde()))
        .map((k, edge) -> new KeyValue<>(edge.target(), edge))
        .join(filteredVertices, (e, v) -> e, Joined.with(keySerde(), new KryoSerde<>(), vertexValueSerde()))
        .map((k, edge) -> new KeyValue<>(new Edge<>(edge.source(), edge.target()), edge.value()))
        .groupByKey(Grouped.with(new KryoSerde<>(), edgeValueSerde()))
        .reduce((v1, v2) -> v2, Materialized.<Edge<K>, EV, KeyValueStore<Bytes, byte[]>>as(generateStoreName()).withKeySerde(new KryoSerde<>()).withValueSerde(edgeValueSerde()));

    return new KGraph<>(filteredVertices, remainingEdges, serialized);
}
 
Example #12
Source File: KGraph.java    From kafka-graphs with Apache License 2.0 4 votes vote down vote up
public KTable<K, VV> reduceOnNeighbors(Reducer<VV> reducer,
                                       EdgeDirection direction) throws IllegalArgumentException {
    switch (direction) {
        case IN:
            KStream<K, Tuple2<EdgeWithValue<K, EV>, VV>> edgesWithSources =
                edgesBySource()
                    .join(vertices, Tuple2::new, Joined.with(keySerde(), new KryoSerde<>(), vertexValueSerde()));
            KTable<K, Map<EdgeWithValue<K, EV>, VV>> neighborsGroupedByTarget = edgesWithSources
                .map(new MapNeighbors(EdgeWithValue::target))
                .groupByKey(Grouped.with(keySerde(), new KryoSerde<>()))
                .aggregate(
                    HashMap::new,
                    (aggKey, value, aggregate) -> {
                        aggregate.put(value._1, value._2);
                        return aggregate;
                    },
                    Materialized.with(keySerde(), new KryoSerde<>()));
            KTable<K, VV> neighborsReducedByTarget = neighborsGroupedByTarget
                .mapValues(v -> v.values().stream().reduce(reducer::apply).orElse(null),
                    Materialized.<K, VV, KeyValueStore<Bytes, byte[]>>as(generateStoreName())
                        .withKeySerde(keySerde()).withValueSerde(vertexValueSerde()));
            return neighborsReducedByTarget;
        case OUT:
            KStream<K, Tuple2<EdgeWithValue<K, EV>, VV>> edgesWithTargets =
                edgesByTarget()
                    .join(vertices, Tuple2::new, Joined.with(keySerde(), new KryoSerde<>(), vertexValueSerde()));
            KTable<K, Map<EdgeWithValue<K, EV>, VV>> neighborsGroupedBySource = edgesWithTargets
                .map(new MapNeighbors(EdgeWithValue::source))
                .groupByKey(Grouped.with(keySerde(), new KryoSerde<>()))
                .aggregate(
                    HashMap::new,
                    (aggKey, value, aggregate) -> {
                        aggregate.put(value._1, value._2);
                        return aggregate;
                    },
                    Materialized.with(keySerde(), new KryoSerde<>()));
            KTable<K, VV> neighborsReducedBySource = neighborsGroupedBySource
                .mapValues(v -> v.values().stream().reduce(reducer::apply).orElse(null),
                    Materialized.<K, VV, KeyValueStore<Bytes, byte[]>>as(generateStoreName())
                        .withKeySerde(keySerde()).withValueSerde(vertexValueSerde()));
            return neighborsReducedBySource;
        case BOTH:
            throw new UnsupportedOperationException();
        default:
            throw new IllegalArgumentException("Illegal edge direction");
    }
}
 
Example #13
Source File: KGraph.java    From kafka-graphs with Apache License 2.0 4 votes vote down vote up
public <T> KTable<K, T> groupReduceOnNeighbors(NeighborsFunctionWithVertexValue<K, VV, EV, T> neighborsFunction,
                                               EdgeDirection direction) throws IllegalArgumentException {
    switch (direction) {
        case IN:
            KStream<K, Tuple2<EdgeWithValue<K, EV>, VV>> edgesWithSources =
                edgesBySource()
                    .join(vertices, Tuple2::new, Joined.with(keySerde(), new KryoSerde<>(), vertexValueSerde()));
            KTable<K, Map<EdgeWithValue<K, EV>, VV>> neighborsGroupedByTarget = edgesWithSources
                .map(new MapNeighbors(EdgeWithValue::target))
                .groupByKey(Grouped.with(keySerde(), new KryoSerde<>()))
                .aggregate(
                    HashMap::new,
                    (aggKey, value, aggregate) -> {
                        aggregate.put(value._1, value._2);
                        return aggregate;
                    },
                    Materialized.with(keySerde(), new KryoSerde<>()));
            return vertices()
                .leftJoin(neighborsGroupedByTarget,
                    new ApplyNeighborLeftJoinFunction<>(neighborsFunction), Materialized.with(keySerde(), new KryoSerde<>()));
        case OUT:
            KStream<K, Tuple2<EdgeWithValue<K, EV>, VV>> edgesWithTargets =
                edgesByTarget()
                    .join(vertices, Tuple2::new, Joined.with(keySerde(), new KryoSerde<>(), vertexValueSerde()));
            KTable<K, Map<EdgeWithValue<K, EV>, VV>> neighborsGroupedBySource = edgesWithTargets
                .map(new MapNeighbors(EdgeWithValue::source))
                .groupByKey(Grouped.with(keySerde(), new KryoSerde<>()))
                .aggregate(
                    HashMap::new,
                    (aggKey, value, aggregate) -> {
                        aggregate.put(value._1, value._2);
                        return aggregate;
                    },
                    Materialized.with(keySerde(), new KryoSerde<>()));
            return vertices()
                .leftJoin(neighborsGroupedBySource,
                    new ApplyNeighborLeftJoinFunction<>(neighborsFunction), Materialized.with(keySerde(), new KryoSerde<>()));
        case BOTH:
            throw new UnsupportedOperationException();
        default:
            throw new IllegalArgumentException("Illegal edge direction");
    }
}
 
Example #14
Source File: KafkaStreamsJoinsApp.java    From kafka-streams-in-action with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {

        StreamsConfig streamsConfig = new StreamsConfig(getProperties());
        StreamsBuilder builder = new StreamsBuilder();


        Serde<Purchase> purchaseSerde = StreamsSerdes.PurchaseSerde();
        Serde<String> stringSerde = Serdes.String();

        KeyValueMapper<String, Purchase, KeyValue<String,Purchase>> custIdCCMasking = (k, v) -> {
            Purchase masked = Purchase.builder(v).maskCreditCard().build();
            return new KeyValue<>(masked.getCustomerId(), masked);
        };


        Predicate<String, Purchase> coffeePurchase = (key, purchase) -> purchase.getDepartment().equalsIgnoreCase("coffee");
        Predicate<String, Purchase> electronicPurchase = (key, purchase) -> purchase.getDepartment().equalsIgnoreCase("electronics");

        int COFFEE_PURCHASE = 0;
        int ELECTRONICS_PURCHASE = 1;

        KStream<String, Purchase> transactionStream = builder.stream( "transactions", Consumed.with(Serdes.String(), purchaseSerde)).map(custIdCCMasking);

        KStream<String, Purchase>[] branchesStream = transactionStream.selectKey((k,v)-> v.getCustomerId()).branch(coffeePurchase, electronicPurchase);

        KStream<String, Purchase> coffeeStream = branchesStream[COFFEE_PURCHASE];
        KStream<String, Purchase> electronicsStream = branchesStream[ELECTRONICS_PURCHASE];

        ValueJoiner<Purchase, Purchase, CorrelatedPurchase> purchaseJoiner = new PurchaseJoiner();
        JoinWindows twentyMinuteWindow =  JoinWindows.of(60 * 1000 * 20);

        KStream<String, CorrelatedPurchase> joinedKStream = coffeeStream.join(electronicsStream,
                                                                              purchaseJoiner,
                                                                              twentyMinuteWindow,
                                                                              Joined.with(stringSerde,
                                                                                          purchaseSerde,
                                                                                          purchaseSerde));

        joinedKStream.print(Printed.<String, CorrelatedPurchase>toSysOut().withLabel("joined KStream"));

        // used only to produce data for this application, not typical usage
        MockDataProducer.producePurchaseData();
        
        LOG.info("Starting Join Examples");
        KafkaStreams kafkaStreams = new KafkaStreams(builder.build(), streamsConfig);
        kafkaStreams.start();
        Thread.sleep(65000);
        LOG.info("Shutting down the Join Examples now");
        kafkaStreams.close();
        MockDataProducer.shutdown();


    }
 
Example #15
Source File: CountingWindowingAndKtableJoinExample.java    From kafka-streams-in-action with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {


        StreamsConfig streamsConfig = new StreamsConfig(getProperties());

        Serde<String> stringSerde = Serdes.String();
        Serde<StockTransaction> transactionSerde = StreamsSerdes.StockTransactionSerde();
        Serde<TransactionSummary> transactionKeySerde = StreamsSerdes.TransactionSummarySerde();

        StreamsBuilder builder = new StreamsBuilder();
        long twentySeconds = 1000 * 20;
        long fifteenMinutes = 1000 * 60 * 15;
        long fiveSeconds = 1000 * 5;
        KTable<Windowed<TransactionSummary>, Long> customerTransactionCounts =
                 builder.stream(STOCK_TRANSACTIONS_TOPIC, Consumed.with(stringSerde, transactionSerde).withOffsetResetPolicy(LATEST))
                .groupBy((noKey, transaction) -> TransactionSummary.from(transaction),
                        Serialized.with(transactionKeySerde, transactionSerde))
                 // session window comment line below and uncomment another line below for a different window example
                .windowedBy(SessionWindows.with(twentySeconds).until(fifteenMinutes)).count();

                //The following are examples of different windows examples

                //Tumbling window with timeout 15 minutes
                //.windowedBy(TimeWindows.of(twentySeconds).until(fifteenMinutes)).count();

                //Tumbling window with default timeout 24 hours
                //.windowedBy(TimeWindows.of(twentySeconds)).count();

                //Hopping window 
                //.windowedBy(TimeWindows.of(twentySeconds).advanceBy(fiveSeconds).until(fifteenMinutes)).count();

        customerTransactionCounts.toStream().print(Printed.<Windowed<TransactionSummary>, Long>toSysOut().withLabel("Customer Transactions Counts"));

        KStream<String, TransactionSummary> countStream = customerTransactionCounts.toStream().map((window, count) -> {
                      TransactionSummary transactionSummary = window.key();
                      String newKey = transactionSummary.getIndustry();
                      transactionSummary.setSummaryCount(count);
                      return KeyValue.pair(newKey, transactionSummary);
        });

        KTable<String, String> financialNews = builder.table( "financial-news", Consumed.with(EARLIEST));


        ValueJoiner<TransactionSummary, String, String> valueJoiner = (txnct, news) ->
                String.format("%d shares purchased %s related news [%s]", txnct.getSummaryCount(), txnct.getStockTicker(), news);

        KStream<String,String> joined = countStream.leftJoin(financialNews, valueJoiner, Joined.with(stringSerde, transactionKeySerde, stringSerde));

        joined.print(Printed.<String, String>toSysOut().withLabel("Transactions and News"));



        KafkaStreams kafkaStreams = new KafkaStreams(builder.build(), streamsConfig);
        kafkaStreams.cleanUp();
        
        kafkaStreams.setUncaughtExceptionHandler((t, e) -> {
            LOG.error("had exception ", e);
        });
        CustomDateGenerator dateGenerator = CustomDateGenerator.withTimestampsIncreasingBy(Duration.ofMillis(750));
        
        DataGenerator.setTimestampGenerator(dateGenerator::get);
        
        MockDataProducer.produceStockTransactions(2, 5, 3, false);

        LOG.info("Starting CountingWindowing and KTableJoins Example");
        kafkaStreams.cleanUp();
        kafkaStreams.start();
        Thread.sleep(65000);
        LOG.info("Shutting down the CountingWindowing and KTableJoins Example Application now");
        kafkaStreams.close();
        MockDataProducer.shutdown();
    }
 
Example #16
Source File: KafkaStreamsInteractiveQuerySample.java    From spring-cloud-stream-samples with Apache License 2.0 4 votes vote down vote up
@Bean
public BiConsumer<KStream<String, PlayEvent>, KTable<Long, Song>> process() {

	return (s, t) -> {
		// create and configure the SpecificAvroSerdes required in this example
		final Map<String, String> serdeConfig = Collections.singletonMap(
				AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, "http://localhost:8081");

		final SpecificAvroSerde<PlayEvent> playEventSerde = new SpecificAvroSerde<>();
		playEventSerde.configure(serdeConfig, false);

		final SpecificAvroSerde<Song> keySongSerde = new SpecificAvroSerde<>();
		keySongSerde.configure(serdeConfig, true);

		final SpecificAvroSerde<Song> valueSongSerde = new SpecificAvroSerde<>();
		valueSongSerde.configure(serdeConfig, false);

		final SpecificAvroSerde<SongPlayCount> songPlayCountSerde = new SpecificAvroSerde<>();
		songPlayCountSerde.configure(serdeConfig, false);

		// Accept play events that have a duration >= the minimum
		final KStream<Long, PlayEvent> playsBySongId =
				s.filter((region, event) -> event.getDuration() >= MIN_CHARTABLE_DURATION)
						// repartition based on song id
						.map((key, value) -> KeyValue.pair(value.getSongId(), value));

		// join the plays with song as we will use it later for charting
		final KStream<Long, Song> songPlays = playsBySongId.leftJoin(t,
				(value1, song) -> song,
				Joined.with(Serdes.Long(), playEventSerde, valueSongSerde));

		// create a state store to track song play counts
		final KTable<Song, Long> songPlayCounts = songPlays.groupBy((songId, song) -> song,
				Serialized.with(keySongSerde, valueSongSerde))
				.count(Materialized.<Song, Long, KeyValueStore<Bytes, byte[]>>as(SONG_PLAY_COUNT_STORE)
						.withKeySerde(valueSongSerde)
						.withValueSerde(Serdes.Long()));

		final TopFiveSerde topFiveSerde = new TopFiveSerde();

		// Compute the top five charts for each genre. The results of this computation will continuously update the state
		// store "top-five-songs-by-genre", and this state store can then be queried interactively via a REST API (cf.
		// MusicPlaysRestService) for the latest charts per genre.
		songPlayCounts.groupBy((song, plays) ->
						KeyValue.pair(song.getGenre().toLowerCase(),
								new SongPlayCount(song.getId(), plays)),
				Serialized.with(Serdes.String(), songPlayCountSerde))
				// aggregate into a TopFiveSongs instance that will keep track
				// of the current top five for each genre. The data will be available in the
				// top-five-songs-genre store
				.aggregate(TopFiveSongs::new,
						(aggKey, value, aggregate) -> {
							aggregate.add(value);
							return aggregate;
						},
						(aggKey, value, aggregate) -> {
							aggregate.remove(value);
							return aggregate;
						},
						Materialized.<String, TopFiveSongs, KeyValueStore<Bytes, byte[]>>as(TOP_FIVE_SONGS_BY_GENRE_STORE)
								.withKeySerde(Serdes.String())
								.withValueSerde(topFiveSerde)
				);

		// Compute the top five chart. The results of this computation will continuously update the state
		// store "top-five-songs", and this state store can then be queried interactively via a REST API (cf.
		// MusicPlaysRestService) for the latest charts per genre.
		songPlayCounts.groupBy((song, plays) ->
						KeyValue.pair(TOP_FIVE_KEY,
								new SongPlayCount(song.getId(), plays)),
				Serialized.with(Serdes.String(), songPlayCountSerde))
				.aggregate(TopFiveSongs::new,
						(aggKey, value, aggregate) -> {
							aggregate.add(value);
							return aggregate;
						},
						(aggKey, value, aggregate) -> {
							aggregate.remove(value);
							return aggregate;
						},
						Materialized.<String, TopFiveSongs, KeyValueStore<Bytes, byte[]>>as(TOP_FIVE_SONGS_STORE)
								.withKeySerde(Serdes.String())
								.withValueSerde(topFiveSerde)
				);
	};

}
 
Example #17
Source File: SchemaKStream.java    From ksql-fork-with-deep-learning-function with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
public SchemaKStream leftJoin(
    final SchemaKTable schemaKTable,
    final Schema joinSchema,
    final Field joinKey,
    KsqlTopicSerDe joinSerDe,
    KsqlConfig ksqlConfig
) {

  KStream joinedKStream =
      kstream.leftJoin(
          schemaKTable.getKtable(),
          (ValueJoiner<GenericRow, GenericRow, GenericRow>) (leftGenericRow, rightGenericRow) -> {
            List<Object> columns = new ArrayList<>(leftGenericRow.getColumns());
            if (rightGenericRow == null) {
              for (int i = leftGenericRow.getColumns().size();
                   i < joinSchema.fields().size(); i++) {
                columns.add(null);
              }
            } else {
              columns.addAll(rightGenericRow.getColumns());
            }

            return new GenericRow(columns);
          },
          Joined.with(Serdes.String(),
                      joinSerDe.getGenericRowSerde(this.getSchema(),
                                                   ksqlConfig, false, schemaRegistryClient
                      ), null
          )
      );

  return new SchemaKStream(
      joinSchema,
      joinedKStream,
      joinKey,
      Arrays.asList(this, schemaKTable),
      Type.JOIN,
      functionRegistry,
      schemaRegistryClient
  );
}