Java Code Examples for org.apache.samza.application.descriptors.StreamApplicationDescriptor#getConfig()

The following examples show how to use org.apache.samza.application.descriptors.StreamApplicationDescriptor#getConfig() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestAsyncFlatMap.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  Config config = appDescriptor.getConfig();
  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(TEST_SYSTEM);
  KafkaOutputDescriptor<PageView>
      outputDescriptor = kafkaSystemDescriptor.getOutputDescriptor(NON_GUEST_PAGE_VIEW_STREAM, new NoOpSerde<>());
  OutputStream<PageView> nonGuestPageViewStream = appDescriptor.getOutputStream(outputDescriptor);

  Predicate<PageView> failProcess = (Predicate<PageView> & Serializable) (ignored) -> config.getBoolean(FAIL_PROCESS, false);
  Predicate<PageView> failDownstreamOperator = (Predicate<PageView> & Serializable) (ignored) -> config.getBoolean(FAIL_DOWNSTREAM_OPERATOR, false);
  Supplier<Long> processJitter = (Supplier<Long> & Serializable) () -> config.getLong(PROCESS_JITTER, 100);

  appDescriptor.getInputStream(kafkaSystemDescriptor.getInputDescriptor(PAGE_VIEW_STREAM, new NoOpSerde<PageView>()))
      .flatMapAsync(pageView -> filterGuestPageViews(pageView, failProcess, processJitter))
      .filter(pageView -> filterLoginPageViews(pageView, failDownstreamOperator))
      .sendTo(nonGuestPageViewStream);
}
 
Example 2
Source File: BroadcastAssertApp.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  Config config = appDescriptor.getConfig();
  String inputTopic = config.get(INPUT_TOPIC_NAME_PROP);

  final JsonSerdeV2<PageView> serde = new JsonSerdeV2<>(PageView.class);
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> isd = ksd.getInputDescriptor(inputTopic, serde);
  final MessageStream<PageView> broadcastPageViews = appDescriptor
      .getInputStream(isd)
      .broadcast(serde, "pv");

  /**
   * Each task will see all the pageview events
   */
  MessageStreamAssert.that("Each task contains all broadcast PageView events", broadcastPageViews, serde)
      .forEachTask()
      .containsInAnyOrder(
          Arrays.asList(
              new PageView("v1", "p1", "u1"),
              new PageView("v2", "p2", "u1"),
              new PageView("v3", "p1", "u2"),
              new PageView("v4", "p3", "u2")
          ));
}
 
Example 3
Source File: SamzaSqlApplication.java    From samza with Apache License 2.0 4 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  try {
    // TODO: Introduce an API to return a dsl string containing one or more sql statements.
    List<String> dslStmts = SamzaSqlDslConverter.fetchSqlFromConfig(appDescriptor.getConfig());

    Map<Integer, TranslatorContext> translatorContextMap = new HashMap<>();

    // 1. Get Calcite plan
    List<String> inputSystemStreams = new LinkedList<>();
    List<String> outputSystemStreams = new LinkedList<>();

    Collection<RelRoot> relRoots =
        SamzaSqlApplicationConfig.populateSystemStreamsAndGetRelRoots(dslStmts, appDescriptor.getConfig(),
            inputSystemStreams, outputSystemStreams);

    // 2. Populate configs
    SamzaSqlApplicationConfig sqlConfig =
        new SamzaSqlApplicationConfig(appDescriptor.getConfig(), inputSystemStreams, outputSystemStreams);

    // 3. Translate Calcite plan to Samza stream operators
    QueryTranslator queryTranslator = new QueryTranslator(appDescriptor, sqlConfig);
    SamzaSqlExecutionContext executionContext = new SamzaSqlExecutionContext(sqlConfig);
    // QueryId implies the index of the query in multiple query statements scenario. It should always start with 0.
    int queryId = 0;
    for (RelRoot relRoot : relRoots) {
      LOG.info("Translating relRoot {} to samza stream graph with queryId {}", relRoot, queryId);
      TranslatorContext translatorContext = new TranslatorContext(appDescriptor, relRoot, executionContext);
      translatorContextMap.put(queryId, translatorContext);
      queryTranslator.translate(relRoot, sqlConfig.getOutputSystemStreams().get(queryId), translatorContext, queryId);
      queryId++;
    }

    // 4. Set all translator contexts
    /*
     * TODO When serialization of ApplicationDescriptor is actually needed, then something will need to be updated here,
     * since translatorContext is not Serializable. Currently, a new ApplicationDescriptor instance is created in each
     * container, so it does not need to be serialized. Therefore, the translatorContext is recreated in each container
     * and does not need to be serialized.
     */
    appDescriptor.withApplicationTaskContextFactory(new ApplicationTaskContextFactory<SamzaSqlApplicationContext>() {
      @Override
      public SamzaSqlApplicationContext create(ExternalContext externalContext, JobContext jobContext,
          ContainerContext containerContext, TaskContext taskContext,
          ApplicationContainerContext applicationContainerContext) {
        return new SamzaSqlApplicationContext(translatorContextMap);
      }
    });
  } catch (RuntimeException e) {
    LOG.error("SamzaSqlApplication threw exception.", e);
    throw e;
  }
}
 
Example 4
Source File: RepartitionJoinWindowApp.java    From samza with Apache License 2.0 4 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  // offset.default = oldest required for tests since checkpoint topic is empty on start and messages are published
  // before the application is run
  Config config = appDescriptor.getConfig();
  String inputTopic1 = config.get(INPUT_TOPIC_1_CONFIG_KEY);
  String inputTopic2 = config.get(INPUT_TOPIC_2_CONFIG_KEY);
  String outputTopic = config.get(OUTPUT_TOPIC_CONFIG_KEY);
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> id1 = ksd.getInputDescriptor(inputTopic1, new JsonSerdeV2<>(PageView.class));
  KafkaInputDescriptor<AdClick> id2 = ksd.getInputDescriptor(inputTopic2, new JsonSerdeV2<>(AdClick.class));

  MessageStream<PageView> pageViews = appDescriptor.getInputStream(id1);
  MessageStream<AdClick> adClicks = appDescriptor.getInputStream(id2);

  MessageStream<KV<String, PageView>> pageViewsRepartitionedByViewId = pageViews
      .partitionBy(PageView::getViewId, pv -> pv,
          new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(PageView.class)), "pageViewsByViewId");

  MessageStream<PageView> pageViewsRepartitionedByViewIdValueONly = pageViewsRepartitionedByViewId.map(KV::getValue);

  MessageStream<KV<String, AdClick>> adClicksRepartitionedByViewId = adClicks
      .partitionBy(AdClick::getViewId, ac -> ac,
          new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(AdClick.class)), "adClicksByViewId");
  MessageStream<AdClick> adClicksRepartitionedByViewIdValueOnly = adClicksRepartitionedByViewId.map(KV::getValue);

  MessageStream<UserPageAdClick> userPageAdClicks = pageViewsRepartitionedByViewIdValueONly
      .join(adClicksRepartitionedByViewIdValueOnly, new UserPageViewAdClicksJoiner(),
          new StringSerde(), new JsonSerdeV2<>(PageView.class), new JsonSerdeV2<>(AdClick.class),
          Duration.ofMinutes(1), "pageViewAdClickJoin");

  MessageStream<KV<String, UserPageAdClick>> userPageAdClicksByUserId = userPageAdClicks
      .partitionBy(UserPageAdClick::getUserId, upac -> upac,
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userPageAdClicksByUserId");

  userPageAdClicksByUserId.map(KV::getValue)
      .window(Windows.keyedSessionWindow(UserPageAdClick::getUserId, Duration.ofSeconds(3),
          new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userAdClickWindow")
      .map(windowPane -> KV.of(windowPane.getKey().getKey(), String.valueOf(windowPane.getMessage().size())))
      .sink((message, messageCollector, taskCoordinator) -> {
        taskCoordinator.commit(TaskCoordinator.RequestScope.ALL_TASKS_IN_CONTAINER);
        messageCollector.send(
            new OutgoingMessageEnvelope(
                new SystemStream("kafka", outputTopic), null, message.getKey(), message.getValue()));
      });


  intermediateStreamIds.add(((IntermediateMessageStreamImpl) pageViewsRepartitionedByViewId).getStreamId());
  intermediateStreamIds.add(((IntermediateMessageStreamImpl) adClicksRepartitionedByViewId).getStreamId());
  intermediateStreamIds.add(((IntermediateMessageStreamImpl) userPageAdClicksByUserId).getStreamId());
}