Java Code Examples for org.apache.samza.application.descriptors.StreamApplicationDescriptor#getInputStream()

The following examples show how to use org.apache.samza.application.descriptors.StreamApplicationDescriptor#getInputStream() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RepartitionExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));

  KafkaOutputDescriptor<KV<String, MyStreamOutput>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(MyStreamOutput.class)));

  appDescriptor.withDefaultSystem(trackingSystem);
  MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<KV<String, MyStreamOutput>> pageViewEventPerMember = appDescriptor.getOutputStream(outputStreamDescriptor);

  pageViewEvents
      .partitionBy(pve -> pve.getMemberId(), pve -> pve,
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewEvent.class)), "partitionBy")
      .window(Windows.keyedTumblingWindow(
          KV::getKey, Duration.ofMinutes(5), () -> 0, (m, c) -> c + 1, null, null), "window")
      .map(windowPane -> KV.of(windowPane.getKey().getKey(), new MyStreamOutput(windowPane)))
      .sendTo(pageViewEventPerMember);
}
 
Example 2
Source File: PageViewCounterExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));

  KafkaOutputDescriptor<KV<String, PageViewCount>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewCount.class)));

  MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<KV<String, PageViewCount>> pageViewEventPerMemberStream = appDescriptor.getOutputStream(outputStreamDescriptor);

  SupplierFunction<Integer> initialValue = () -> 0;
  FoldLeftFunction<PageViewEvent, Integer> foldLeftFn = (m, c) -> c + 1;
  pageViewEvents
      .window(Windows.keyedTumblingWindow(PageViewEvent::getMemberId, Duration.ofSeconds(10), initialValue, foldLeftFn, null, null)
          .setEarlyTrigger(Triggers.repeat(Triggers.count(5)))
          .setAccumulationMode(AccumulationMode.DISCARDING), "tumblingWindow")
      .map(windowPane -> KV.of(windowPane.getKey().getKey(), buildPageViewCount(windowPane)))
      .sendTo(pageViewEventPerMemberStream);
}
 
Example 3
Source File: WindowExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));
  KafkaOutputDescriptor<Integer> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember", new IntegerSerde());

  SupplierFunction<Integer> initialValue = () -> 0;
  FoldLeftFunction<PageViewEvent, Integer> counter = (m, c) -> c == null ? 1 : c + 1;
  MessageStream<PageViewEvent> inputStream = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<Integer> outputStream = appDescriptor.getOutputStream(outputStreamDescriptor);

  // create a tumbling window that outputs the number of message collected every 10 minutes.
  // also emit early results if either the number of messages collected reaches 30000, or if no new messages arrive
  // for 1 minute.
  inputStream
      .window(Windows.tumblingWindow(Duration.ofMinutes(10), initialValue, counter, new IntegerSerde())
          .setLateTrigger(Triggers.any(Triggers.count(30000),
              Triggers.timeSinceLastMessage(Duration.ofMinutes(1)))), "window")
      .map(WindowPane::getMessage)
      .sendTo(outputStream);
}
 
Example 4
Source File: BroadcastExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KVSerde<String, PageViewEvent> serde = KVSerde.of(new StringSerde("UTF-8"), new JsonSerdeV2<>(PageViewEvent.class));
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
  KafkaInputDescriptor<KV<String, PageViewEvent>> pageViewEvent =
      trackingSystem.getInputDescriptor("pageViewEvent", serde);
  KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream1 =
      trackingSystem.getOutputDescriptor("outStream1", serde);
  KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream2 =
      trackingSystem.getOutputDescriptor("outStream2", serde);
  KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream3 =
      trackingSystem.getOutputDescriptor("outStream3", serde);

  MessageStream<KV<String, PageViewEvent>> inputStream = appDescriptor.getInputStream(pageViewEvent);
  inputStream.filter(m -> m.key.equals("key1")).sendTo(appDescriptor.getOutputStream(outStream1));
  inputStream.filter(m -> m.key.equals("key2")).sendTo(appDescriptor.getOutputStream(outStream2));
  inputStream.filter(m -> m.key.equals("key3")).sendTo(appDescriptor.getOutputStream(outStream3));
}
 
Example 5
Source File: AsyncApplicationExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<AdClickEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("adClickEvent", new JsonSerdeV2<>(AdClickEvent.class));

  KafkaOutputDescriptor<KV<String, EnrichedAdClickEvent>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("enrichedAdClickEvent",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(EnrichedAdClickEvent.class)));

  MessageStream<AdClickEvent> adClickEventStream = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<KV<String, EnrichedAdClickEvent>> enrichedAdClickStream =
      appDescriptor.getOutputStream(outputStreamDescriptor);

  adClickEventStream
      .flatMapAsync(AsyncApplicationExample::enrichAdClickEvent)
      .map(enrichedAdClickEvent -> KV.of(enrichedAdClickEvent.getCountry(), enrichedAdClickEvent))
      .sendTo(enrichedAdClickStream);
}
 
Example 6
Source File: SessionWindowApp.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
  KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
  KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);

  MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
  OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);

  pageViews
      .filter(m -> !FILTER_KEY.equals(m.getUserId()))
      .window(Windows.keyedSessionWindow(PageView::getUserId, Duration.ofSeconds(3),
          new StringSerde(), new JsonSerdeV2<>(PageView.class)), "sessionWindow")
      .map(m -> KV.of(m.getKey().getKey(), m.getMessage().size()))
      .sendTo(outputStream);
}
 
Example 7
Source File: TumblingWindowApp.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
  KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
  KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);

  MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
  OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);

  pageViews
      .filter(m -> !FILTER_KEY.equals(m.getUserId()))
      .window(Windows.keyedTumblingWindow(PageView::getUserId, Duration.ofSeconds(3),
          new StringSerde(), new JsonSerdeV2<>(PageView.class)), "tumblingWindow")
      .map(m -> KV.of(m.getKey().getKey(), m.getMessage().size()))
      .sendTo(outputStream);

}
 
Example 8
Source File: FilterExample.java    From samza-hello-samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME)
      .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
      .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
      .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KVSerde<String, PageView> serde = KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageView.class));
  KafkaInputDescriptor<KV<String, PageView>> inputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(INPUT_STREAM_ID, serde);
  KafkaOutputDescriptor<KV<String, PageView>> outputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM_ID, serde);

  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  MessageStream<KV<String, PageView>> pageViews = appDescriptor.getInputStream(inputDescriptor);
  OutputStream<KV<String, PageView>> filteredPageViews = appDescriptor.getOutputStream(outputDescriptor);

  pageViews
      .filter(kv -> !INVALID_USER_ID.equals(kv.value.userId))
      .sendTo(filteredPageViews);
}
 
Example 9
Source File: TestSchedulingApp.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  final JsonSerdeV2<PageView> serde = new JsonSerdeV2<>(PageView.class);
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("kafka");
  KafkaInputDescriptor<PageView> isd = ksd.getInputDescriptor(PAGE_VIEWS, serde);
  final MessageStream<PageView> pageViews = appDescriptor.getInputStream(isd);
  final MessageStream<PageView> output = pageViews.flatMap(new FlatmapScheduledFn());

  MessageStreamAssert.that("Output from scheduling function should container all complete messages", output, serde)
      .containsInAnyOrder(
          Arrays.asList(
              new PageView("v1-complete", "p1", "u1"),
              new PageView("v2-complete", "p2", "u1"),
              new PageView("v3-complete", "p1", "u2"),
              new PageView("v4-complete", "p3", "u2")
          ));
}
 
Example 10
Source File: StreamTableJoinExample.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  Serde<Profile> profileSerde = new JsonSerdeV2<>(Profile.class);
  Serde<PageView> pageViewSerde = new JsonSerdeV2<>(PageView.class);
  Serde<EnrichedPageView> joinResultSerde = new JsonSerdeV2<>(EnrichedPageView.class);

  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME)
      .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
      .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
      .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KafkaInputDescriptor<Profile> profileInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(PROFILE_STREAM_ID, profileSerde);
  KafkaInputDescriptor<PageView> pageViewInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(PAGEVIEW_STREAM_ID, pageViewSerde);
  KafkaOutputDescriptor<EnrichedPageView> joinResultOutputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_TOPIC, joinResultSerde);

  RocksDbTableDescriptor<String, Profile> profileTableDescriptor =
      new RocksDbTableDescriptor<String, Profile>("profile-table", KVSerde.of(new StringSerde(), profileSerde));

  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  MessageStream<Profile> profileStream = appDescriptor.getInputStream(profileInputDescriptor);
  MessageStream<PageView> pageViewStream = appDescriptor.getInputStream(pageViewInputDescriptor);
  OutputStream<EnrichedPageView> joinResultStream = appDescriptor.getOutputStream(joinResultOutputDescriptor);
  Table<KV<String, Profile>> profileTable = appDescriptor.getTable(profileTableDescriptor);

  profileStream
      .map(profile -> KV.of(profile.userId, profile))
      .sendTo(profileTable);

  pageViewStream
      .partitionBy(pv -> pv.userId, pv -> pv, KVSerde.of(new StringSerde(), pageViewSerde), "join")
      .join(profileTable, new JoinFn())
      .sendTo(joinResultStream);
}
 
Example 11
Source File: AzureApplication.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  // Define your system here
  EventHubsSystemDescriptor systemDescriptor = new EventHubsSystemDescriptor("eventhubs");

  // Choose your serializer/deserializer for the EventData payload
  StringSerde serde = new StringSerde();

  // Define the input and output descriptors with respective configs
  EventHubsInputDescriptor<KV<String, String>> inputDescriptor =
      systemDescriptor.getInputDescriptor(INPUT_STREAM_ID, EVENTHUBS_NAMESPACE, EVENTHUBS_INPUT_ENTITY, serde)
          .withSasKeyName(appDescriptor.getConfig().get(EVENTHUBS_SAS_KEY_NAME_CONFIG))
          .withSasKey(appDescriptor.getConfig().get(EVENTHUBS_SAS_KEY_TOKEN_CONFIG));

  EventHubsOutputDescriptor<KV<String, String>> outputDescriptor =
      systemDescriptor.getOutputDescriptor(OUTPUT_STREAM_ID, EVENTHUBS_NAMESPACE, EVENTHUBS_OUTPUT_ENTITY, serde)
          .withSasKeyName(appDescriptor.getConfig().get(EVENTHUBS_SAS_KEY_NAME_CONFIG))
          .withSasKey(appDescriptor.getConfig().get(EVENTHUBS_SAS_KEY_TOKEN_CONFIG));

  // Define the input and output streams with descriptors
  MessageStream<KV<String, String>> eventhubInput = appDescriptor.getInputStream(inputDescriptor);
  OutputStream<KV<String, String>> eventhubOutput = appDescriptor.getOutputStream(outputDescriptor);

  // Define the execution flow with the high-level API
  eventhubInput
      .map((message) -> {
        System.out.println("Sending: ");
        System.out.println("Received Key: " + message.getKey());
        System.out.println("Received Message: " + message.getValue());
        return message;
      })
      .sendTo(eventhubOutput);
}
 
Example 12
Source File: RemoteTableJoinExample.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME)
      .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
      .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
      .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KafkaInputDescriptor<String> stockSymbolInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(INPUT_STREAM_ID, new StringSerde());
  KafkaOutputDescriptor<StockPrice> stockPriceOutputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM_ID, new JsonSerdeV2<>(StockPrice.class));
  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);
  MessageStream<String> stockSymbolStream = appDescriptor.getInputStream(stockSymbolInputDescriptor);
  OutputStream<StockPrice> stockPriceStream = appDescriptor.getOutputStream(stockPriceOutputDescriptor);

  RemoteTableDescriptor<String, Double> remoteTableDescriptor =
      new RemoteTableDescriptor("remote-table")
          .withReadRateLimit(10)
          .withReadFunction(new StockPriceReadFunction());
  CachingTableDescriptor<String, Double> cachedRemoteTableDescriptor =
      new CachingTableDescriptor<>("cached-remote-table", remoteTableDescriptor)
          .withReadTtl(Duration.ofSeconds(5));
  Table<KV<String, Double>> cachedRemoteTable = appDescriptor.getTable(cachedRemoteTableDescriptor);

  stockSymbolStream
      .map(symbol -> new KV<String, Void>(symbol, null))
      .join(cachedRemoteTable, new JoinFn())
      .sendTo(stockPriceStream);

}
 
Example 13
Source File: StreamApplicationIntegrationTest.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("test");
  KafkaInputDescriptor<KV<String, PageView>> isd =
      ksd.getInputDescriptor("PageView", KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()));
  MessageStream<KV<String, TestData.PageView>> inputStream = appDescriptor.getInputStream(isd);
  inputStream
      .map(KV::getValue)
      .partitionBy(PageView::getMemberId, pv -> pv, KVSerde.of(new IntegerSerde(), new JsonSerdeV2<>(PageView.class)), "p1")
      .sink((m, collector, coordinator) ->
          collector.send(new OutgoingMessageEnvelope(new SystemStream("test", "Output"), m.getKey(), m.getKey(), m)));
}
 
Example 14
Source File: StreamApplicationIntegrationTest.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("test");
  KafkaInputDescriptor<KV<String, PageView>> isd =
      ksd.getInputDescriptor("PageView", KVSerde.of(new StringSerde(), new JsonSerdeV2<>()));
  MessageStream<KV<String, TestData.PageView>> inputStream = appDescriptor.getInputStream(isd);
  inputStream.map(KV::getValue).filter(pv -> pv.getPageKey().equals("inbox"));
}
 
Example 15
Source File: TestStreamApplication.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(systemName);
  KafkaOutputDescriptor<String> osd = ksd.getOutputDescriptor(outputTopic, new StringSerde());
  OutputStream<String> outputStream = appDescriptor.getOutputStream(osd);

  for (String inputTopic : inputTopics) {
    KafkaInputDescriptor<String> isd = ksd.getInputDescriptor(inputTopic, new NoOpSerde<>());
    MessageStream<String> inputStream = appDescriptor.getInputStream(isd);
    inputStream.map(new TestMapFunction(appName, processorName)).sendTo(outputStream);
  }
}
 
Example 16
Source File: AzureBlobApplication.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  // Define a system descriptor for Kafka
  KafkaSystemDescriptor kafkaSystemDescriptor =
      new KafkaSystemDescriptor("kafka").withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
          .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
          .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KafkaInputDescriptor<PageView> pageViewInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(INPUT_PAGEVIEW_STREAM_ID, new JsonSerdeV2<>(PageView.class));

  // Define a system descriptor for Azure Blob Storage
  GenericSystemDescriptor azureBlobSystemDescriptor =
      new GenericSystemDescriptor(OUTPUT_SYSTEM, "org.apache.samza.system.azureblob.AzureBlobSystemFactory");

  GenericOutputDescriptor<PageViewAvroRecord> azureBlobOuputDescriptor =
      azureBlobSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM, new NoOpSerde<>());

  // Set Kafka as the default system for the job
  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  // Define the input and output streams with descriptors
  MessageStream<PageView> pageViewInput = appDescriptor.getInputStream(pageViewInputDescriptor);
  OutputStream<PageViewAvroRecord> pageViewAvroRecordOutputStream = appDescriptor.getOutputStream(azureBlobOuputDescriptor);

  // Define the execution flow with the high-level API
  pageViewInput
      .map((message) -> {
        LOG.info("Sending: Received PageViewEvent with pageId: " + message.pageId);
        return PageViewAvroRecord.buildPageViewRecord(message);
      })
      .sendTo(pageViewAvroRecordOutputStream);
}
 
Example 17
Source File: TestLocalTableEndToEnd.java    From samza with Apache License 2.0 4 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDesc) {
  KVSerde<Integer, Profile> profileKVSerde = KVSerde.of(new IntegerSerde(), new ProfileJsonSerde());
  KVSerde<Integer, PageView> pageViewKVSerde = KVSerde.of(new IntegerSerde(), new PageViewJsonSerde());

  PageViewToProfileJoinFunction joinFn1 = new PageViewToProfileJoinFunction();
  PageViewToProfileJoinFunction joinFn2 = new PageViewToProfileJoinFunction();

  Table<KV<Integer, Profile>> profileTable = appDesc.getTable(new InMemoryTableDescriptor("t1", profileKVSerde));

  DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
  GenericInputDescriptor<Profile> profileISD1 = ksd.getInputDescriptor("Profile1", new NoOpSerde<>());
  GenericInputDescriptor<Profile> profileISD2 = ksd.getInputDescriptor("Profile2", new NoOpSerde<>());
  MessageStream<Profile> profileStream1 = appDesc.getInputStream(profileISD1);
  MessageStream<Profile> profileStream2 = appDesc.getInputStream(profileISD2);

  profileStream1
      .map(m -> {
        sentToProfileTable1.add(m);
        return new KV(m.getMemberId(), m);
      })
      .sendTo(profileTable);
  profileStream2
      .map(m -> {
        sentToProfileTable2.add(m);
        return new KV(m.getMemberId(), m);
      })
      .sendTo(profileTable);

  GenericInputDescriptor<PageView> pageViewISD1 = ksd.getInputDescriptor("PageView1", new NoOpSerde<PageView>());
  GenericInputDescriptor<PageView> pageViewISD2 = ksd.getInputDescriptor("PageView2", new NoOpSerde<PageView>());
  MessageStream<PageView> pageViewStream1 = appDesc.getInputStream(pageViewISD1);
  MessageStream<PageView> pageViewStream2 = appDesc.getInputStream(pageViewISD2);

  pageViewStream1
      .partitionBy(PageView::getMemberId, v -> v, pageViewKVSerde, "p1")
      .join(profileTable, joinFn1)
      .sink((m, collector, coordinator) -> joinedPageViews1.add(m));

  pageViewStream2
      .partitionBy(PageView::getMemberId, v -> v, pageViewKVSerde, "p2")
      .join(profileTable, joinFn2)
      .sink((m, collector, coordinator) -> joinedPageViews2.add(m));
}
 
Example 18
Source File: WikipediaApplication.java    From samza-hello-samza with Apache License 2.0 4 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {

  Duration windowDuration =
      appDescriptor.getConfig().containsKey("deploy.test") ? Duration.ofMillis(10) : Duration.ofSeconds(10);
  // Define a SystemDescriptor for Wikipedia data
  WikipediaSystemDescriptor wikipediaSystemDescriptor = new WikipediaSystemDescriptor("irc.wikimedia.org", 6667);

  // Define InputDescriptors for consuming wikipedia data
  WikipediaInputDescriptor wikipediaInputDescriptor = wikipediaSystemDescriptor
      .getInputDescriptor("en-wikipedia")
      .withChannel(WIKIPEDIA_CHANNEL);
  WikipediaInputDescriptor wiktionaryInputDescriptor = wikipediaSystemDescriptor
      .getInputDescriptor("en-wiktionary")
      .withChannel(WIKTIONARY_CHANNEL);
  WikipediaInputDescriptor wikiNewsInputDescriptor = wikipediaSystemDescriptor
      .getInputDescriptor("en-wikinews")
      .withChannel(WIKINEWS_CHANNEL);

  // Define a system descriptor for Kafka
  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor("kafka")
      .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
      .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
      .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  // Define an output descriptor
  KafkaOutputDescriptor<WikipediaStatsOutput> statsOutputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor("wikipedia-stats", new JsonSerdeV2<>(WikipediaStatsOutput.class));


  // Set the default system descriptor to Kafka, so that it is used for all
  // internal resources, e.g., kafka topic for checkpointing, coordinator stream.
  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);
  MessageStream<WikipediaFeedEvent> wikipediaEvents = appDescriptor.getInputStream(wikipediaInputDescriptor);
  MessageStream<WikipediaFeedEvent> wiktionaryEvents = appDescriptor.getInputStream(wiktionaryInputDescriptor);
  MessageStream<WikipediaFeedEvent> wikiNewsEvents = appDescriptor.getInputStream(wikiNewsInputDescriptor);
  OutputStream<WikipediaStatsOutput> wikipediaStats = appDescriptor.getOutputStream(statsOutputDescriptor);

  // Merge inputs
  MessageStream<WikipediaFeedEvent> allWikipediaEvents =
      MessageStream.mergeAll(ImmutableList.of(wikipediaEvents, wiktionaryEvents, wikiNewsEvents));


  // Parse, update stats, prepare output, and send
  allWikipediaEvents
      .map(WikipediaParser::parseEvent)
      .window(Windows.tumblingWindow(windowDuration,
          WikipediaStats::new, new WikipediaStatsAggregator(), WikipediaStats.serde()), "statsWindow")
      .map(this::formatOutput)
      .sendTo(wikipediaStats);
}
 
Example 19
Source File: CouchbaseTableExample.java    From samza-hello-samza with Apache License 2.0 4 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor app) {

  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME)
      .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
      .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
      .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KafkaInputDescriptor<String> wordInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(INPUT_STREAM_ID, new StringSerde());

  KafkaOutputDescriptor<String> countOutputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM_ID, new StringSerde());

  MyCouchbaseTableWriteFunction writeFn = new MyCouchbaseTableWriteFunction(BUCKET_NAME, CLUSTER_NODES)
      .withBootstrapCarrierDirectPort(COUCHBASE_PORT)
      .withUsernameAndPassword(BUCKET_NAME, BUCKET_PASSWORD)
      .withTimeout(Duration.ofSeconds(5));

  TableRetryPolicy retryPolicy = new TableRetryPolicy()
      .withFixedBackoff(Duration.ofSeconds(1))
      .withStopAfterAttempts(3);

  RemoteTableDescriptor couchbaseTableDescriptor = new RemoteTableDescriptor("couchbase-table")
      .withReadFunction(new NoOpTableReadFunction())
      .withReadRateLimiterDisabled()
      .withWriteFunction(writeFn)
      .withWriteRetryPolicy(retryPolicy)
      .withWriteRateLimit(4);

  app.withDefaultSystem(kafkaSystemDescriptor);
  MessageStream<String> wordStream = app.getInputStream(wordInputDescriptor);
  OutputStream<String> countStream = app.getOutputStream(countOutputDescriptor);
  app.getTable(couchbaseTableDescriptor);

  wordStream
      .flatMap(m -> Arrays.asList(m.split(" ")))
      .filter(word -> word != null && word.length() > 0)
      .map(new MyCountFunction())
      .map(countString -> currentTime() + " " + countString)
      .sendTo(countStream);
}
 
Example 20
Source File: RepartitionJoinWindowApp.java    From samza with Apache License 2.0 4 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  // offset.default = oldest required for tests since checkpoint topic is empty on start and messages are published
  // before the application is run
  Config config = appDescriptor.getConfig();
  String inputTopic1 = config.get(INPUT_TOPIC_1_CONFIG_KEY);
  String inputTopic2 = config.get(INPUT_TOPIC_2_CONFIG_KEY);
  String outputTopic = config.get(OUTPUT_TOPIC_CONFIG_KEY);
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> id1 = ksd.getInputDescriptor(inputTopic1, new JsonSerdeV2<>(PageView.class));
  KafkaInputDescriptor<AdClick> id2 = ksd.getInputDescriptor(inputTopic2, new JsonSerdeV2<>(AdClick.class));

  MessageStream<PageView> pageViews = appDescriptor.getInputStream(id1);
  MessageStream<AdClick> adClicks = appDescriptor.getInputStream(id2);

  MessageStream<KV<String, PageView>> pageViewsRepartitionedByViewId = pageViews
      .partitionBy(PageView::getViewId, pv -> pv,
          new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(PageView.class)), "pageViewsByViewId");

  MessageStream<PageView> pageViewsRepartitionedByViewIdValueONly = pageViewsRepartitionedByViewId.map(KV::getValue);

  MessageStream<KV<String, AdClick>> adClicksRepartitionedByViewId = adClicks
      .partitionBy(AdClick::getViewId, ac -> ac,
          new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(AdClick.class)), "adClicksByViewId");
  MessageStream<AdClick> adClicksRepartitionedByViewIdValueOnly = adClicksRepartitionedByViewId.map(KV::getValue);

  MessageStream<UserPageAdClick> userPageAdClicks = pageViewsRepartitionedByViewIdValueONly
      .join(adClicksRepartitionedByViewIdValueOnly, new UserPageViewAdClicksJoiner(),
          new StringSerde(), new JsonSerdeV2<>(PageView.class), new JsonSerdeV2<>(AdClick.class),
          Duration.ofMinutes(1), "pageViewAdClickJoin");

  MessageStream<KV<String, UserPageAdClick>> userPageAdClicksByUserId = userPageAdClicks
      .partitionBy(UserPageAdClick::getUserId, upac -> upac,
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userPageAdClicksByUserId");

  userPageAdClicksByUserId.map(KV::getValue)
      .window(Windows.keyedSessionWindow(UserPageAdClick::getUserId, Duration.ofSeconds(3),
          new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userAdClickWindow")
      .map(windowPane -> KV.of(windowPane.getKey().getKey(), String.valueOf(windowPane.getMessage().size())))
      .sink((message, messageCollector, taskCoordinator) -> {
        taskCoordinator.commit(TaskCoordinator.RequestScope.ALL_TASKS_IN_CONTAINER);
        messageCollector.send(
            new OutgoingMessageEnvelope(
                new SystemStream("kafka", outputTopic), null, message.getKey(), message.getValue()));
      });


  intermediateStreamIds.add(((IntermediateMessageStreamImpl) pageViewsRepartitionedByViewId).getStreamId());
  intermediateStreamIds.add(((IntermediateMessageStreamImpl) adClicksRepartitionedByViewId).getStreamId());
  intermediateStreamIds.add(((IntermediateMessageStreamImpl) userPageAdClicksByUserId).getStreamId());
}