org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor Java Examples

The following examples show how to use org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TransactionalStateMultiStoreIntegrationTest.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(INPUT_SYSTEM);
  KVSerde<String, String> serde = KVSerde.of(new StringSerde(), new StringSerde());

  KafkaInputDescriptor<KV<String, String>> isd = ksd.getInputDescriptor(INPUT_TOPIC, serde);

  RocksDbTableDescriptor<String, String> td1 = new RocksDbTableDescriptor<>(STORE_1_NAME, serde)
      .withChangelogStream(changelogTopic)
      .withChangelogReplicationFactor(1);

  RocksDbTableDescriptor<String, String> td2 = new RocksDbTableDescriptor<>(STORE_2_NAME, serde)
      .withChangelogStream(STORE_2_CHANGELOG)
      .withChangelogReplicationFactor(1);

  appDescriptor
      .withInputStream(isd)
      .withTaskFactory((StreamTaskFactory) () -> new MyTask())
      .withTable(td1)
      .withTable(td2);
}
 
Example #2
Source File: TestSchedulingApp.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  final JsonSerdeV2<PageView> serde = new JsonSerdeV2<>(PageView.class);
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("kafka");
  KafkaInputDescriptor<PageView> isd = ksd.getInputDescriptor(PAGE_VIEWS, serde);
  final MessageStream<PageView> pageViews = appDescriptor.getInputStream(isd);
  final MessageStream<PageView> output = pageViews.flatMap(new FlatmapScheduledFn());

  MessageStreamAssert.that("Output from scheduling function should container all complete messages", output, serde)
      .containsInAnyOrder(
          Arrays.asList(
              new PageView("v1-complete", "p1", "u1"),
              new PageView("v2-complete", "p2", "u1"),
              new PageView("v3-complete", "p1", "u2"),
              new PageView("v4-complete", "p3", "u2")
          ));
}
 
Example #3
Source File: BroadcastAssertApp.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  Config config = appDescriptor.getConfig();
  String inputTopic = config.get(INPUT_TOPIC_NAME_PROP);

  final JsonSerdeV2<PageView> serde = new JsonSerdeV2<>(PageView.class);
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> isd = ksd.getInputDescriptor(inputTopic, serde);
  final MessageStream<PageView> broadcastPageViews = appDescriptor
      .getInputStream(isd)
      .broadcast(serde, "pv");

  /**
   * Each task will see all the pageview events
   */
  MessageStreamAssert.that("Each task contains all broadcast PageView events", broadcastPageViews, serde)
      .forEachTask()
      .containsInAnyOrder(
          Arrays.asList(
              new PageView("v1", "p1", "u1"),
              new PageView("v2", "p2", "u1"),
              new PageView("v3", "p1", "u2"),
              new PageView("v4", "p3", "u2")
          ));
}
 
Example #4
Source File: FilterExample.java    From samza-hello-samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME)
      .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
      .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
      .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KVSerde<String, PageView> serde = KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageView.class));
  KafkaInputDescriptor<KV<String, PageView>> inputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(INPUT_STREAM_ID, serde);
  KafkaOutputDescriptor<KV<String, PageView>> outputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM_ID, serde);

  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  MessageStream<KV<String, PageView>> pageViews = appDescriptor.getInputStream(inputDescriptor);
  OutputStream<KV<String, PageView>> filteredPageViews = appDescriptor.getOutputStream(outputDescriptor);

  pageViews
      .filter(kv -> !INVALID_USER_ID.equals(kv.value.userId))
      .sendTo(filteredPageViews);
}
 
Example #5
Source File: TestAsyncFlatMap.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  Config config = appDescriptor.getConfig();
  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(TEST_SYSTEM);
  KafkaOutputDescriptor<PageView>
      outputDescriptor = kafkaSystemDescriptor.getOutputDescriptor(NON_GUEST_PAGE_VIEW_STREAM, new NoOpSerde<>());
  OutputStream<PageView> nonGuestPageViewStream = appDescriptor.getOutputStream(outputDescriptor);

  Predicate<PageView> failProcess = (Predicate<PageView> & Serializable) (ignored) -> config.getBoolean(FAIL_PROCESS, false);
  Predicate<PageView> failDownstreamOperator = (Predicate<PageView> & Serializable) (ignored) -> config.getBoolean(FAIL_DOWNSTREAM_OPERATOR, false);
  Supplier<Long> processJitter = (Supplier<Long> & Serializable) () -> config.getLong(PROCESS_JITTER, 100);

  appDescriptor.getInputStream(kafkaSystemDescriptor.getInputDescriptor(PAGE_VIEW_STREAM, new NoOpSerde<PageView>()))
      .flatMapAsync(pageView -> filterGuestPageViews(pageView, failProcess, processJitter))
      .filter(pageView -> filterLoginPageViews(pageView, failDownstreamOperator))
      .sendTo(nonGuestPageViewStream);
}
 
Example #6
Source File: RepartitionWindowApp.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KVSerde<String, PageView> inputSerde = KVSerde.of(new StringSerde("UTF-8"), new JsonSerdeV2<>(PageView.class));
  KVSerde<String, String> outputSerde = KVSerde.of(new StringSerde(), new StringSerde());
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<KV<String, PageView>> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
  KafkaOutputDescriptor<KV<String, String>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);

  appDescriptor.getInputStream(id)
      .map(KV::getValue)
      .partitionBy(PageView::getUserId, m -> m, inputSerde, "p1")
      .window(Windows.keyedSessionWindow(m -> m.getKey(), Duration.ofSeconds(3), () -> 0, (m, c) -> c + 1, new StringSerde("UTF-8"), new IntegerSerde()), "w1")
      .map(wp -> KV.of(wp.getKey().getKey().toString(), String.valueOf(wp.getMessage())))
      .sendTo(appDescriptor.getOutputStream(od));

}
 
Example #7
Source File: TumblingWindowApp.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
  KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
  KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);

  MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
  OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);

  pageViews
      .filter(m -> !FILTER_KEY.equals(m.getUserId()))
      .window(Windows.keyedTumblingWindow(PageView::getUserId, Duration.ofSeconds(3),
          new StringSerde(), new JsonSerdeV2<>(PageView.class)), "tumblingWindow")
      .map(m -> KV.of(m.getKey().getKey(), m.getMessage().size()))
      .sendTo(outputStream);

}
 
Example #8
Source File: SessionWindowApp.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
  KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
  KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);

  MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
  OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);

  pageViews
      .filter(m -> !FILTER_KEY.equals(m.getUserId()))
      .window(Windows.keyedSessionWindow(PageView::getUserId, Duration.ofSeconds(3),
          new StringSerde(), new JsonSerdeV2<>(PageView.class)), "sessionWindow")
      .map(m -> KV.of(m.getKey().getKey(), m.getMessage().size()))
      .sendTo(outputStream);
}
 
Example #9
Source File: TransactionalStateIntegrationTest.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(INPUT_SYSTEM);
  KVSerde<String, String> serde = KVSerde.of(new StringSerde(), new StringSerde());

  KafkaInputDescriptor<KV<String, String>> isd = ksd.getInputDescriptor(INPUT_TOPIC, serde);

  RocksDbTableDescriptor<String, String> td = new RocksDbTableDescriptor<>(STORE_NAME, serde)
      .withChangelogStream(changelogTopic)
      .withChangelogReplicationFactor(1);

  appDescriptor
      .withInputStream(isd)
      .withTaskFactory((StreamTaskFactory) () -> new MyTask())
      .withTable(td);
}
 
Example #10
Source File: AsyncApplicationExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<AdClickEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("adClickEvent", new JsonSerdeV2<>(AdClickEvent.class));

  KafkaOutputDescriptor<KV<String, EnrichedAdClickEvent>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("enrichedAdClickEvent",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(EnrichedAdClickEvent.class)));

  MessageStream<AdClickEvent> adClickEventStream = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<KV<String, EnrichedAdClickEvent>> enrichedAdClickStream =
      appDescriptor.getOutputStream(outputStreamDescriptor);

  adClickEventStream
      .flatMapAsync(AsyncApplicationExample::enrichAdClickEvent)
      .map(enrichedAdClickEvent -> KV.of(enrichedAdClickEvent.getCountry(), enrichedAdClickEvent))
      .sendTo(enrichedAdClickStream);
}
 
Example #11
Source File: AppWithGlobalConfigExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));

  KafkaOutputDescriptor<KV<String, PageViewCount>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewCount.class)));

  appDescriptor.getInputStream(inputStreamDescriptor)
      .window(Windows.<PageViewEvent, String, Integer>keyedTumblingWindow(PageViewEvent::getMemberId, Duration.ofSeconds(10), () -> 0, (m, c) -> c + 1,
          null, null)
          .setEarlyTrigger(Triggers.repeat(Triggers.count(5)))
          .setAccumulationMode(AccumulationMode.DISCARDING), "window1")
      .map(m -> KV.of(m.getKey().getKey(), buildPageViewCount(m)))
      .sendTo(appDescriptor.getOutputStream(outputStreamDescriptor));

  appDescriptor.withMetricsReporterFactories(new HashMap<>());
}
 
Example #12
Source File: BroadcastExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KVSerde<String, PageViewEvent> serde = KVSerde.of(new StringSerde("UTF-8"), new JsonSerdeV2<>(PageViewEvent.class));
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
  KafkaInputDescriptor<KV<String, PageViewEvent>> pageViewEvent =
      trackingSystem.getInputDescriptor("pageViewEvent", serde);
  KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream1 =
      trackingSystem.getOutputDescriptor("outStream1", serde);
  KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream2 =
      trackingSystem.getOutputDescriptor("outStream2", serde);
  KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream3 =
      trackingSystem.getOutputDescriptor("outStream3", serde);

  MessageStream<KV<String, PageViewEvent>> inputStream = appDescriptor.getInputStream(pageViewEvent);
  inputStream.filter(m -> m.key.equals("key1")).sendTo(appDescriptor.getOutputStream(outStream1));
  inputStream.filter(m -> m.key.equals("key2")).sendTo(appDescriptor.getOutputStream(outStream2));
  inputStream.filter(m -> m.key.equals("key3")).sendTo(appDescriptor.getOutputStream(outStream3));
}
 
Example #13
Source File: WindowExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));
  KafkaOutputDescriptor<Integer> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember", new IntegerSerde());

  SupplierFunction<Integer> initialValue = () -> 0;
  FoldLeftFunction<PageViewEvent, Integer> counter = (m, c) -> c == null ? 1 : c + 1;
  MessageStream<PageViewEvent> inputStream = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<Integer> outputStream = appDescriptor.getOutputStream(outputStreamDescriptor);

  // create a tumbling window that outputs the number of message collected every 10 minutes.
  // also emit early results if either the number of messages collected reaches 30000, or if no new messages arrive
  // for 1 minute.
  inputStream
      .window(Windows.tumblingWindow(Duration.ofMinutes(10), initialValue, counter, new IntegerSerde())
          .setLateTrigger(Triggers.any(Triggers.count(30000),
              Triggers.timeSinceLastMessage(Duration.ofMinutes(1)))), "window")
      .map(WindowPane::getMessage)
      .sendTo(outputStream);
}
 
Example #14
Source File: MergeExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KVSerde<String, PageViewEvent> serde = KVSerde.of(new StringSerde("UTF-8"), new JsonSerdeV2<>(PageViewEvent.class));
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<KV<String, PageViewEvent>> isd1 =
      trackingSystem.getInputDescriptor("pageViewStream1", serde);
  KafkaInputDescriptor<KV<String, PageViewEvent>> isd2 =
      trackingSystem.getInputDescriptor("pageViewStream2", serde);
  KafkaInputDescriptor<KV<String, PageViewEvent>> isd3 =
      trackingSystem.getInputDescriptor("pageViewStream3", serde);

  KafkaOutputDescriptor<KV<String, PageViewEvent>> osd =
      trackingSystem.getOutputDescriptor("mergedStream", serde);

  MessageStream
      .mergeAll(ImmutableList.of(appDescriptor.getInputStream(isd1), appDescriptor.getInputStream(isd2), appDescriptor.getInputStream(isd3)))
      .sendTo(appDescriptor.getOutputStream(osd));
}
 
Example #15
Source File: SamzaSumDemo.java    From scotty-window-processor with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
    Thread demoSource = new DemoKafkaProducer(INPUT_DESCRIPTOR_NAME);
    demoSource.start();
    KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM_DESCRIPTOR_NAME)
            .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
            .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
            .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);
    KafkaInputDescriptor kid = ksd.getInputDescriptor(INPUT_DESCRIPTOR_NAME, KVSerde.of(new IntegerSerde(), new IntegerSerde()));
    KafkaOutputDescriptor kod = ksd.getOutputDescriptor(OUTPUT_DESCRIPTOR_NAME, KVSerde.of(new IntegerSerde(), new IntegerSerde()));

    appDescriptor
            .withInputStream(kid)
            .withOutputStream(kod);

    appDescriptor.withTaskFactory(new DemoTaskFactory(SYSTEM_DESCRIPTOR_NAME, OUTPUT_DESCRIPTOR_NAME));

}
 
Example #16
Source File: PageViewCounterExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));

  KafkaOutputDescriptor<KV<String, PageViewCount>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewCount.class)));

  MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<KV<String, PageViewCount>> pageViewEventPerMemberStream = appDescriptor.getOutputStream(outputStreamDescriptor);

  SupplierFunction<Integer> initialValue = () -> 0;
  FoldLeftFunction<PageViewEvent, Integer> foldLeftFn = (m, c) -> c + 1;
  pageViewEvents
      .window(Windows.keyedTumblingWindow(PageViewEvent::getMemberId, Duration.ofSeconds(10), initialValue, foldLeftFn, null, null)
          .setEarlyTrigger(Triggers.repeat(Triggers.count(5)))
          .setAccumulationMode(AccumulationMode.DISCARDING), "tumblingWindow")
      .map(windowPane -> KV.of(windowPane.getKey().getKey(), buildPageViewCount(windowPane)))
      .sendTo(pageViewEventPerMemberStream);
}
 
Example #17
Source File: KeyValueStoreExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));

  KafkaOutputDescriptor<KV<String, StatsOutput>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(StatsOutput.class)));

  appDescriptor.withDefaultSystem(trackingSystem);
  MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<KV<String, StatsOutput>> pageViewEventPerMember = appDescriptor.getOutputStream(outputStreamDescriptor);

  pageViewEvents
      .partitionBy(pve -> pve.getMemberId(), pve -> pve,
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewEvent.class)), "partitionBy")
      .map(KV::getValue)
      .flatMap(new MyStatsCounter())
      .map(stats -> KV.of(stats.memberId, stats))
      .sendTo(pageViewEventPerMember);
}
 
Example #18
Source File: RepartitionExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));

  KafkaOutputDescriptor<KV<String, MyStreamOutput>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(MyStreamOutput.class)));

  appDescriptor.withDefaultSystem(trackingSystem);
  MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<KV<String, MyStreamOutput>> pageViewEventPerMember = appDescriptor.getOutputStream(outputStreamDescriptor);

  pageViewEvents
      .partitionBy(pve -> pve.getMemberId(), pve -> pve,
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewEvent.class)), "partitionBy")
      .window(Windows.keyedTumblingWindow(
          KV::getKey, Duration.ofMinutes(5), () -> 0, (m, c) -> c + 1, null, null), "window")
      .map(windowPane -> KV.of(windowPane.getKey().getKey(), new MyStreamOutput(windowPane)))
      .sendTo(pageViewEventPerMember);
}
 
Example #19
Source File: OrderShipmentJoinExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<OrderRecord> orderStreamDescriptor =
      trackingSystem.getInputDescriptor("orders", new JsonSerdeV2<>(OrderRecord.class));
  KafkaInputDescriptor<ShipmentRecord> shipmentStreamDescriptor =
      trackingSystem.getInputDescriptor("shipments", new JsonSerdeV2<>(ShipmentRecord.class));
  KafkaOutputDescriptor<KV<String, FulfilledOrderRecord>> fulfilledOrdersStreamDescriptor =
      trackingSystem.getOutputDescriptor("fulfilledOrders",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(FulfilledOrderRecord.class)));

  appDescriptor.getInputStream(orderStreamDescriptor)
      .join(appDescriptor.getInputStream(shipmentStreamDescriptor), new MyJoinFunction(),
          new StringSerde(), new JsonSerdeV2<>(OrderRecord.class), new JsonSerdeV2<>(ShipmentRecord.class),
          Duration.ofMinutes(1), "join")
      .map(fulFilledOrder -> KV.of(fulFilledOrder.orderId, fulFilledOrder))
      .sendTo(appDescriptor.getOutputStream(fulfilledOrdersStreamDescriptor));

}
 
Example #20
Source File: StreamTableJoinExample.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  Serde<Profile> profileSerde = new JsonSerdeV2<>(Profile.class);
  Serde<PageView> pageViewSerde = new JsonSerdeV2<>(PageView.class);
  Serde<EnrichedPageView> joinResultSerde = new JsonSerdeV2<>(EnrichedPageView.class);

  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME)
      .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
      .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
      .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KafkaInputDescriptor<Profile> profileInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(PROFILE_STREAM_ID, profileSerde);
  KafkaInputDescriptor<PageView> pageViewInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(PAGEVIEW_STREAM_ID, pageViewSerde);
  KafkaOutputDescriptor<EnrichedPageView> joinResultOutputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_TOPIC, joinResultSerde);

  RocksDbTableDescriptor<String, Profile> profileTableDescriptor =
      new RocksDbTableDescriptor<String, Profile>("profile-table", KVSerde.of(new StringSerde(), profileSerde));

  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  MessageStream<Profile> profileStream = appDescriptor.getInputStream(profileInputDescriptor);
  MessageStream<PageView> pageViewStream = appDescriptor.getInputStream(pageViewInputDescriptor);
  OutputStream<EnrichedPageView> joinResultStream = appDescriptor.getOutputStream(joinResultOutputDescriptor);
  Table<KV<String, Profile>> profileTable = appDescriptor.getTable(profileTableDescriptor);

  profileStream
      .map(profile -> KV.of(profile.userId, profile))
      .sendTo(profileTable);

  pageViewStream
      .partitionBy(pv -> pv.userId, pv -> pv, KVSerde.of(new StringSerde(), pageViewSerde), "join")
      .join(profileTable, new JoinFn())
      .sendTo(joinResultStream);
}
 
Example #21
Source File: AzureBlobApplication.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  // Define a system descriptor for Kafka
  KafkaSystemDescriptor kafkaSystemDescriptor =
      new KafkaSystemDescriptor("kafka").withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
          .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
          .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KafkaInputDescriptor<PageView> pageViewInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(INPUT_PAGEVIEW_STREAM_ID, new JsonSerdeV2<>(PageView.class));

  // Define a system descriptor for Azure Blob Storage
  GenericSystemDescriptor azureBlobSystemDescriptor =
      new GenericSystemDescriptor(OUTPUT_SYSTEM, "org.apache.samza.system.azureblob.AzureBlobSystemFactory");

  GenericOutputDescriptor<PageViewAvroRecord> azureBlobOuputDescriptor =
      azureBlobSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM, new NoOpSerde<>());

  // Set Kafka as the default system for the job
  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  // Define the input and output streams with descriptors
  MessageStream<PageView> pageViewInput = appDescriptor.getInputStream(pageViewInputDescriptor);
  OutputStream<PageViewAvroRecord> pageViewAvroRecordOutputStream = appDescriptor.getOutputStream(azureBlobOuputDescriptor);

  // Define the execution flow with the high-level API
  pageViewInput
      .map((message) -> {
        LOG.info("Sending: Received PageViewEvent with pageId: " + message.pageId);
        return PageViewAvroRecord.buildPageViewRecord(message);
      })
      .sendTo(pageViewAvroRecordOutputStream);
}
 
Example #22
Source File: RemoteTableJoinExample.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME)
      .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
      .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
      .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KafkaInputDescriptor<String> stockSymbolInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(INPUT_STREAM_ID, new StringSerde());
  KafkaOutputDescriptor<StockPrice> stockPriceOutputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM_ID, new JsonSerdeV2<>(StockPrice.class));
  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);
  MessageStream<String> stockSymbolStream = appDescriptor.getInputStream(stockSymbolInputDescriptor);
  OutputStream<StockPrice> stockPriceStream = appDescriptor.getOutputStream(stockPriceOutputDescriptor);

  RemoteTableDescriptor<String, Double> remoteTableDescriptor =
      new RemoteTableDescriptor("remote-table")
          .withReadRateLimit(10)
          .withReadFunction(new StockPriceReadFunction());
  CachingTableDescriptor<String, Double> cachedRemoteTableDescriptor =
      new CachingTableDescriptor<>("cached-remote-table", remoteTableDescriptor)
          .withReadTtl(Duration.ofSeconds(5));
  Table<KV<String, Double>> cachedRemoteTable = appDescriptor.getTable(cachedRemoteTableDescriptor);

  stockSymbolStream
      .map(symbol -> new KV<String, Void>(symbol, null))
      .join(cachedRemoteTable, new JoinFn())
      .sendTo(stockPriceStream);

}
 
Example #23
Source File: WikipediaStatsTaskApplication.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor taskApplicationDescriptor) {

  // Define a system descriptor for Kafka
  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor("kafka")
      .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
      .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
      .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  // Input descriptor for the wikipedia-edits topic
  KafkaInputDescriptor kafkaInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor("wikipedia-edits", new JsonSerde<>());

  // Set the default system descriptor to Kafka, so that it is used for all
  // internal resources, e.g., kafka topic for checkpointing, coordinator stream.
  taskApplicationDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  // Set the input
  taskApplicationDescriptor.withInputStream(kafkaInputDescriptor);

  // Set the output
  taskApplicationDescriptor.withOutputStream(
      kafkaSystemDescriptor.getOutputDescriptor("wikipedia-stats", new JsonSerde<>()));

  // Set the task factory
  taskApplicationDescriptor.withTaskFactory((StreamTaskFactory) () -> new WikipediaStatsStreamTask());
}
 
Example #24
Source File: WikipediaParserTaskApplication.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor taskApplicationDescriptor) {

  // Define a system descriptor for Kafka, which is both our input and output system
  KafkaSystemDescriptor kafkaSystemDescriptor =
      new KafkaSystemDescriptor("kafka").withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
          .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
          .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  // Input descriptor for the wikipedia-raw topic
  KafkaInputDescriptor kafkaInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor("wikipedia-raw", new JsonSerde<>());

  // Output descriptor for the wikipedia-edits topic
  KafkaOutputDescriptor kafkaOutputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor("wikipedia-edits", new JsonSerde<>());

  // Set the default system descriptor to Kafka, so that it is used for all
  // internal resources, e.g., kafka topic for checkpointing, coordinator stream.
  taskApplicationDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  // Set the input
  taskApplicationDescriptor.withInputStream(kafkaInputDescriptor);

  // Set the output
  taskApplicationDescriptor.withOutputStream(kafkaOutputDescriptor);

  // Set the task factory
  taskApplicationDescriptor.withTaskFactory((StreamTaskFactory) () -> new WikipediaParserStreamTask());
}
 
Example #25
Source File: WikipediaFeedTaskApplication.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor taskApplicationDescriptor) {

  // Define a SystemDescriptor for Wikipedia data
  WikipediaSystemDescriptor wikipediaSystemDescriptor = new WikipediaSystemDescriptor("irc.wikimedia.org", 6667);

  // Define InputDescriptors for consuming wikipedia data
  WikipediaInputDescriptor wikipediaInputDescriptor =
      wikipediaSystemDescriptor.getInputDescriptor("en-wikipedia").withChannel("#en.wikipedia");
  WikipediaInputDescriptor wiktionaryInputDescriptor =
      wikipediaSystemDescriptor.getInputDescriptor("en-wiktionary").withChannel("#en.wiktionary");
  WikipediaInputDescriptor wikiNewsInputDescriptor =
      wikipediaSystemDescriptor.getInputDescriptor("en-wikinews").withChannel("#en.wikinews");

  // Define a system descriptor for Kafka, which is our output system
  KafkaSystemDescriptor kafkaSystemDescriptor =
      new KafkaSystemDescriptor("kafka").withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
          .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
          .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  // Define an output descriptor
  KafkaOutputDescriptor kafkaOutputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor("wikipedia-raw", new JsonSerde<>());

  // Set the default system descriptor to Kafka, so that it is used for all
  // internal resources, e.g., kafka topic for checkpointing, coordinator stream.
  taskApplicationDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  // Set the inputs
  taskApplicationDescriptor.withInputStream(wikipediaInputDescriptor);
  taskApplicationDescriptor.withInputStream(wiktionaryInputDescriptor);
  taskApplicationDescriptor.withInputStream(wikiNewsInputDescriptor);

  // Set the output
  taskApplicationDescriptor.withOutputStream(kafkaOutputDescriptor);

  // Set the task factory
  taskApplicationDescriptor.withTaskFactory((StreamTaskFactory) () -> new WikipediaFeedStreamTask());
}
 
Example #26
Source File: FaultInjectionTest.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
  Config config = appDescriptor.getConfig();
  String inputTopic = config.get(INPUT_TOPIC_NAME_PROP);

  final JsonSerdeV2<PageView> serde = new JsonSerdeV2<>(PageView.class);
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> isd = ksd.getInputDescriptor(inputTopic, serde);
  appDescriptor
      .withInputStream(isd)
      .withTaskFactory((StreamTaskFactory) () -> new FaultInjectionTask(containerShutdownLatch));
}
 
Example #27
Source File: StreamTaskIntegrationTest.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("test");
  KafkaInputDescriptor<Profile> profileISD = ksd.getInputDescriptor("Profile", new JsonSerdeV2<>());
  KafkaInputDescriptor<PageView> pageViewISD = ksd.getInputDescriptor("PageView", new JsonSerdeV2<>());
  KafkaOutputDescriptor<EnrichedPageView> enrichedPageViewOSD =
      ksd.getOutputDescriptor("EnrichedPageView", new NoOpSerde<>());
  appDescriptor
      .withInputStream(profileISD)
      .withInputStream(pageViewISD)
      .withOutputStream(enrichedPageViewOSD)
      .withTable(new InMemoryTableDescriptor("profile-view-store",
          KVSerde.of(new IntegerSerde(), new TestTableData.ProfileJsonSerde())))
      .withTaskFactory((StreamTaskFactory) () -> new StatefulStreamTask());
}
 
Example #28
Source File: StreamApplicationIntegrationTest.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("test");
  KafkaInputDescriptor<KV<String, PageView>> isd =
      ksd.getInputDescriptor("PageView", KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()));
  MessageStream<KV<String, TestData.PageView>> inputStream = appDescriptor.getInputStream(isd);
  inputStream
      .map(KV::getValue)
      .partitionBy(PageView::getMemberId, pv -> pv, KVSerde.of(new IntegerSerde(), new JsonSerdeV2<>(PageView.class)), "p1")
      .sink((m, collector, coordinator) ->
          collector.send(new OutgoingMessageEnvelope(new SystemStream("test", "Output"), m.getKey(), m.getKey(), m)));
}
 
Example #29
Source File: StreamApplicationIntegrationTest.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("test");
  KafkaInputDescriptor<KV<String, PageView>> isd =
      ksd.getInputDescriptor("PageView", KVSerde.of(new StringSerde(), new JsonSerdeV2<>()));
  MessageStream<KV<String, TestData.PageView>> inputStream = appDescriptor.getInputStream(isd);
  inputStream.map(KV::getValue).filter(pv -> pv.getPageKey().equals("inbox"));
}
 
Example #30
Source File: StreamApplicationIntegrationTest.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  Table<KV<Integer, TestTableData.Profile>> table = appDescriptor.getTable(
      new RocksDbTableDescriptor<Integer, TestTableData.Profile>("profile-view-store",
          KVSerde.of(new IntegerSerde(), new TestTableData.ProfileJsonSerde())));

  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("test");

  KafkaInputDescriptor<KV<String, TestTableData.Profile>> profileISD =
      ksd.getInputDescriptor("Profile", KVSerde.of(new StringSerde(), new JsonSerdeV2<>()));

  KafkaInputDescriptor<KV<String, TestTableData.PageView>> pageViewISD =
      ksd.getInputDescriptor("PageView", KVSerde.of(new StringSerde(), new JsonSerdeV2<>()));
  KafkaOutputDescriptor<TestTableData.EnrichedPageView> enrichedPageViewOSD =
      ksd.getOutputDescriptor("EnrichedPageView", new JsonSerdeV2<>());

  appDescriptor.getInputStream(profileISD)
      .map(m -> new KV(m.getValue().getMemberId(), m.getValue()))
      .sendTo(table)
      .sink((kv, collector, coordinator) -> {
        LOG.info("Inserted Profile with Key: {} in profile-view-store", kv.getKey());
      });

  OutputStream<TestTableData.EnrichedPageView> outputStream = appDescriptor.getOutputStream(enrichedPageViewOSD);
  appDescriptor.getInputStream(pageViewISD)
      .partitionBy(pv -> pv.getValue().getMemberId(),  pv -> pv.getValue(), KVSerde.of(new IntegerSerde(), new JsonSerdeV2<>(TestTableData.PageView.class)), "p1")
      .join(table, new PageViewToProfileJoinFunction())
      .sendTo(outputStream)
      .map(TestTableData.EnrichedPageView::getPageKey)
      .sink((joinPageKey, collector, coordinator) -> {
        collector.send(new OutgoingMessageEnvelope(new SystemStream("test", "JoinPageKeys"), null, null, joinPageKey));
      });

}