org.apache.samza.operators.OutputStream Java Examples

The following examples show how to use org.apache.samza.operators.OutputStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestAsyncFlatMap.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  Config config = appDescriptor.getConfig();
  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(TEST_SYSTEM);
  KafkaOutputDescriptor<PageView>
      outputDescriptor = kafkaSystemDescriptor.getOutputDescriptor(NON_GUEST_PAGE_VIEW_STREAM, new NoOpSerde<>());
  OutputStream<PageView> nonGuestPageViewStream = appDescriptor.getOutputStream(outputDescriptor);

  Predicate<PageView> failProcess = (Predicate<PageView> & Serializable) (ignored) -> config.getBoolean(FAIL_PROCESS, false);
  Predicate<PageView> failDownstreamOperator = (Predicate<PageView> & Serializable) (ignored) -> config.getBoolean(FAIL_DOWNSTREAM_OPERATOR, false);
  Supplier<Long> processJitter = (Supplier<Long> & Serializable) () -> config.getLong(PROCESS_JITTER, 100);

  appDescriptor.getInputStream(kafkaSystemDescriptor.getInputDescriptor(PAGE_VIEW_STREAM, new NoOpSerde<PageView>()))
      .flatMapAsync(pageView -> filterGuestPageViews(pageView, failProcess, processJitter))
      .filter(pageView -> filterLoginPageViews(pageView, failDownstreamOperator))
      .sendTo(nonGuestPageViewStream);
}
 
Example #2
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testMaxPartitionLimit() {
  int partitionLimit = IntermediateStreamManager.MAX_INFERRED_PARTITIONS;

  ExecutionPlanner planner = new ExecutionPlanner(config, streamManager);
  StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<KV<Object, Object>> input1 = appDesc.getInputStream(input4Descriptor);
    OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
    input1.partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1").map(kv -> kv).sendTo(output1);
  }, config);

  JobGraph jobGraph = (JobGraph) planner.plan(graphSpec);

  // Partitions should be the same as input1
  jobGraph.getIntermediateStreams().forEach(edge -> {
    assertEquals(partitionLimit, edge.getPartitionCount()); // max of input1 and output1
  });
}
 
Example #3
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl createStreamGraphWithInvalidStreamTableJoinWithSideInputs() {
  /**
   * Example stream-table join that is invalid due to disagreement in partition count between the
   * stream behind table t and another joined stream. Table t is configured with input2 (16) as
   * side-input stream.
   *
   *                   join-table t -> output1 (8)
   *                         |
   *    input1 (64) —————————
   *
   */
  return new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
    OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);

    TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor(
      "table-id", new KVSerde(new StringSerde(), new StringSerde()))
        .withSideInputs(Arrays.asList("input2"))
        .withSideInputsProcessor(mock(SideInputsProcessor.class));
    Table table = appDesc.getTable(tableDescriptor);

    messageStream1
        .join(table, mock(StreamTableJoinFunction.class))
        .sendTo(output1);
  }, config);
}
 
Example #4
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoinWithSideInputs() {
  /**
   * Example stream-table join where table t is configured with input1 (64) as a side-input stream.
   *
   *                                   join-table t -> output1 (8)
   *                                        |
   *    input2 (16) -> partitionBy ("64") __|
   *
   */
  return new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<KV<Object, Object>> messageStream2 = appDesc.getInputStream(input2Descriptor);
    OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);

    TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor(
      "table-id", new KVSerde(new StringSerde(), new StringSerde()))
        .withSideInputs(Arrays.asList("input1"))
        .withSideInputsProcessor(mock(SideInputsProcessor.class));
    Table table = appDesc.getTable(tableDescriptor);

    messageStream2
        .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1")
        .join(table, mock(StreamTableJoinFunction.class))
        .sendTo(output1);
  }, config);
}
 
Example #5
Source File: RepartitionExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));

  KafkaOutputDescriptor<KV<String, MyStreamOutput>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(MyStreamOutput.class)));

  appDescriptor.withDefaultSystem(trackingSystem);
  MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<KV<String, MyStreamOutput>> pageViewEventPerMember = appDescriptor.getOutputStream(outputStreamDescriptor);

  pageViewEvents
      .partitionBy(pve -> pve.getMemberId(), pve -> pve,
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewEvent.class)), "partitionBy")
      .window(Windows.keyedTumblingWindow(
          KV::getKey, Duration.ofMinutes(5), () -> 0, (m, c) -> c + 1, null, null), "window")
      .map(windowPane -> KV.of(windowPane.getKey().getKey(), new MyStreamOutput(windowPane)))
      .sendTo(pageViewEventPerMember);
}
 
Example #6
Source File: KeyValueStoreExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));

  KafkaOutputDescriptor<KV<String, StatsOutput>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(StatsOutput.class)));

  appDescriptor.withDefaultSystem(trackingSystem);
  MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<KV<String, StatsOutput>> pageViewEventPerMember = appDescriptor.getOutputStream(outputStreamDescriptor);

  pageViewEvents
      .partitionBy(pve -> pve.getMemberId(), pve -> pve,
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewEvent.class)), "partitionBy")
      .map(KV::getValue)
      .flatMap(new MyStatsCounter())
      .map(stats -> KV.of(stats.memberId, stats))
      .sendTo(pageViewEventPerMember);
}
 
Example #7
Source File: PageViewCounterExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));

  KafkaOutputDescriptor<KV<String, PageViewCount>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewCount.class)));

  MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<KV<String, PageViewCount>> pageViewEventPerMemberStream = appDescriptor.getOutputStream(outputStreamDescriptor);

  SupplierFunction<Integer> initialValue = () -> 0;
  FoldLeftFunction<PageViewEvent, Integer> foldLeftFn = (m, c) -> c + 1;
  pageViewEvents
      .window(Windows.keyedTumblingWindow(PageViewEvent::getMemberId, Duration.ofSeconds(10), initialValue, foldLeftFn, null, null)
          .setEarlyTrigger(Triggers.repeat(Triggers.count(5)))
          .setAccumulationMode(AccumulationMode.DISCARDING), "tumblingWindow")
      .map(windowPane -> KV.of(windowPane.getKey().getKey(), buildPageViewCount(windowPane)))
      .sendTo(pageViewEventPerMemberStream);
}
 
Example #8
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl createStreamGraphWithInvalidStreamStreamJoin() {
  /**
   * Creates the following stream-stream join which is invalid due to partition count disagreement
   * between the 2 input streams.
   *
   *   input1 (64) --
   *                 |
   *                join -> output1 (8)
   *                 |
   *   input3 (32) --
   */
  return new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
    MessageStream<KV<Object, Object>> messageStream3 = appDesc.getInputStream(input3Descriptor);
    OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);

    messageStream1
        .join(messageStream3,
            mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(2), "j1")
        .sendTo(output1);
  }, config);
}
 
Example #9
Source File: WindowExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));
  KafkaOutputDescriptor<Integer> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember", new IntegerSerde());

  SupplierFunction<Integer> initialValue = () -> 0;
  FoldLeftFunction<PageViewEvent, Integer> counter = (m, c) -> c == null ? 1 : c + 1;
  MessageStream<PageViewEvent> inputStream = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<Integer> outputStream = appDescriptor.getOutputStream(outputStreamDescriptor);

  // create a tumbling window that outputs the number of message collected every 10 minutes.
  // also emit early results if either the number of messages collected reaches 30000, or if no new messages arrive
  // for 1 minute.
  inputStream
      .window(Windows.tumblingWindow(Duration.ofMinutes(10), initialValue, counter, new IntegerSerde())
          .setLateTrigger(Triggers.any(Triggers.count(30000),
              Triggers.timeSinceLastMessage(Duration.ofMinutes(1)))), "window")
      .map(WindowPane::getMessage)
      .sendTo(outputStream);
}
 
Example #10
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl createSimpleGraph() {
  /**
   * a simple graph of partitionBy and map
   *
   * input1 -> partitionBy -> map -> output1
   *
   */
  return new StreamApplicationDescriptorImpl(appDesc-> {
    MessageStream<KV<Object, Object>> input1 = appDesc.getInputStream(input1Descriptor);
    OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
    input1
        .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1")
        .map(kv -> kv)
        .sendTo(output1);
  }, config);
}
 
Example #11
Source File: FilterExample.java    From samza-hello-samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME)
      .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
      .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
      .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KVSerde<String, PageView> serde = KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageView.class));
  KafkaInputDescriptor<KV<String, PageView>> inputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(INPUT_STREAM_ID, serde);
  KafkaOutputDescriptor<KV<String, PageView>> outputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM_ID, serde);

  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  MessageStream<KV<String, PageView>> pageViews = appDescriptor.getInputStream(inputDescriptor);
  OutputStream<KV<String, PageView>> filteredPageViews = appDescriptor.getOutputStream(outputDescriptor);

  pageViews
      .filter(kv -> !INVALID_USER_ID.equals(kv.value.userId))
      .sendTo(filteredPageViews);
}
 
Example #12
Source File: AsyncApplicationExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<AdClickEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("adClickEvent", new JsonSerdeV2<>(AdClickEvent.class));

  KafkaOutputDescriptor<KV<String, EnrichedAdClickEvent>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("enrichedAdClickEvent",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(EnrichedAdClickEvent.class)));

  MessageStream<AdClickEvent> adClickEventStream = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<KV<String, EnrichedAdClickEvent>> enrichedAdClickStream =
      appDescriptor.getOutputStream(outputStreamDescriptor);

  adClickEventStream
      .flatMapAsync(AsyncApplicationExample::enrichAdClickEvent)
      .map(enrichedAdClickEvent -> KV.of(enrichedAdClickEvent.getCountry(), enrichedAdClickEvent))
      .sendTo(enrichedAdClickStream);
}
 
Example #13
Source File: SessionWindowApp.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
  KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
  KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);

  MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
  OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);

  pageViews
      .filter(m -> !FILTER_KEY.equals(m.getUserId()))
      .window(Windows.keyedSessionWindow(PageView::getUserId, Duration.ofSeconds(3),
          new StringSerde(), new JsonSerdeV2<>(PageView.class)), "sessionWindow")
      .map(m -> KV.of(m.getKey().getKey(), m.getMessage().size()))
      .sendTo(outputStream);
}
 
Example #14
Source File: TumblingWindowApp.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
  KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
  KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);

  MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
  OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);

  pageViews
      .filter(m -> !FILTER_KEY.equals(m.getUserId()))
      .window(Windows.keyedTumblingWindow(PageView::getUserId, Duration.ofSeconds(3),
          new StringSerde(), new JsonSerdeV2<>(PageView.class)), "tumblingWindow")
      .map(m -> KV.of(m.getKey().getKey(), m.getMessage().size()))
      .sendTo(outputStream);

}
 
Example #15
Source File: QueryTranslator.java    From samza with Apache License 2.0 5 votes vote down vote up
private void sendToOutputStream(String queryLogicalId, String logicalOpId, String sinkStream,
    StreamApplicationDescriptor appDesc, TranslatorContext translatorContext, RelNode node, int queryId) {
  SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(sinkStream);
  MessageStream<SamzaSqlRelMessage> stream = translatorContext.getMessageStream(node.getId());
  MessageStream<KV<Object, Object>> outputStream =
      stream.map(new OutputMapFunction(queryLogicalId, logicalOpId, sinkStream, queryId));
  Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor();
  if (!tableDescriptor.isPresent()) {
    KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
    String systemName = sinkConfig.getSystemName();
    DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new);
    GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamId(), noOpKVSerde);
    OutputStream stm = outputMsgStreams.computeIfAbsent(sinkConfig.getSource(), v -> appDesc.getOutputStream(osd));
    outputStream.sendTo(stm);

    // Process system events only if the output is a stream.
    if (sqlConfig.isProcessSystemEvents()) {
      for (MessageStream<SamzaSqlInputMessage> inputStream : inputMsgStreams.values()) {
        MessageStream<KV<Object, Object>> systemEventStream =
            inputStream.filter(message -> message.getMetadata().isSystemMessage())
                .map(SamzaSqlInputMessage::getKeyAndMessageKV);

        systemEventStream.sendTo(stm);
      }
    }
  } else {
    Table outputTable = appDesc.getTable(tableDescriptor.get());
    if (outputTable == null) {
      String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource();
      throw new SamzaException(msg);
    }
    outputStream.sendTo(outputTable);
  }
}
 
Example #16
Source File: TestStreamApplication.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(systemName);
  KafkaOutputDescriptor<String> osd = ksd.getOutputDescriptor(outputTopic, new StringSerde());
  OutputStream<String> outputStream = appDescriptor.getOutputStream(osd);

  for (String inputTopic : inputTopics) {
    KafkaInputDescriptor<String> isd = ksd.getInputDescriptor(inputTopic, new NoOpSerde<>());
    MessageStream<String> inputStream = appDescriptor.getInputStream(isd);
    inputStream.map(new TestMapFunction(appName, processorName)).sendTo(outputStream);
  }
}
 
Example #17
Source File: StreamApplicationIntegrationTest.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  Table<KV<Integer, TestTableData.Profile>> table = appDescriptor.getTable(
      new RocksDbTableDescriptor<Integer, TestTableData.Profile>("profile-view-store",
          KVSerde.of(new IntegerSerde(), new TestTableData.ProfileJsonSerde())));

  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("test");

  KafkaInputDescriptor<KV<String, TestTableData.Profile>> profileISD =
      ksd.getInputDescriptor("Profile", KVSerde.of(new StringSerde(), new JsonSerdeV2<>()));

  KafkaInputDescriptor<KV<String, TestTableData.PageView>> pageViewISD =
      ksd.getInputDescriptor("PageView", KVSerde.of(new StringSerde(), new JsonSerdeV2<>()));
  KafkaOutputDescriptor<TestTableData.EnrichedPageView> enrichedPageViewOSD =
      ksd.getOutputDescriptor("EnrichedPageView", new JsonSerdeV2<>());

  appDescriptor.getInputStream(profileISD)
      .map(m -> new KV(m.getValue().getMemberId(), m.getValue()))
      .sendTo(table)
      .sink((kv, collector, coordinator) -> {
        LOG.info("Inserted Profile with Key: {} in profile-view-store", kv.getKey());
      });

  OutputStream<TestTableData.EnrichedPageView> outputStream = appDescriptor.getOutputStream(enrichedPageViewOSD);
  appDescriptor.getInputStream(pageViewISD)
      .partitionBy(pv -> pv.getValue().getMemberId(),  pv -> pv.getValue(), KVSerde.of(new IntegerSerde(), new JsonSerdeV2<>(TestTableData.PageView.class)), "p1")
      .join(table, new PageViewToProfileJoinFunction())
      .sendTo(outputStream)
      .map(TestTableData.EnrichedPageView::getPageKey)
      .sink((joinPageKey, collector, coordinator) -> {
        collector.send(new OutgoingMessageEnvelope(new SystemStream("test", "JoinPageKeys"), null, null, joinPageKey));
      });

}
 
Example #18
Source File: ExecutionPlannerTestBase.java    From samza with Apache License 2.0 5 votes vote down vote up
StreamApplication getRepartitionJoinStreamApplication() {
  return appDesc -> {
    MessageStream<KV<String, Object>> input1 = appDesc.getInputStream(input1Descriptor);
    MessageStream<KV<String, Object>> input2 = appDesc.getInputStream(input2Descriptor);
    OutputStream<KV<String, Object>> output = appDesc.getOutputStream(outputDescriptor);
    JoinFunction<String, Object, Object, KV<String, Object>> mockJoinFn = mock(JoinFunction.class);
    input1
        .partitionBy(KV::getKey, KV::getValue, defaultSerde, "p1")
        .map(kv -> kv.value)
        .join(input2.map(kv -> kv.value), mockJoinFn,
            new StringSerde(), new JsonSerdeV2<>(Object.class), new JsonSerdeV2<>(Object.class),
            Duration.ofHours(1), "j1")
        .sendTo(output);
  };
}
 
Example #19
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 5 votes vote down vote up
private StreamApplicationDescriptorImpl createStreamGraphWithInvalidStreamTableJoin() {
  /**
   * Example stream-table join that is invalid due to disagreement in partition count
   * between the 2 input streams.
   *
   *    input1 (64) -> send-to-table t
   *
   *                   join-table t -> output1 (8)
   *                         |
   *    input2 (16) —————————
   *
   */
  return new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
    MessageStream<KV<Object, Object>> messageStream2 = appDesc.getInputStream(input2Descriptor);
    OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);

    TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor(
      "table-id", new KVSerde(new StringSerde(), new StringSerde()));
    Table table = appDesc.getTable(tableDescriptor);

    messageStream1.sendTo(table);

    messageStream1
        .join(table, mock(StreamTableJoinFunction.class))
        .join(messageStream2,
            mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2")
        .sendTo(output1);
  }, config);
}
 
Example #20
Source File: RemoteTableJoinExample.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME)
      .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
      .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
      .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KafkaInputDescriptor<String> stockSymbolInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(INPUT_STREAM_ID, new StringSerde());
  KafkaOutputDescriptor<StockPrice> stockPriceOutputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM_ID, new JsonSerdeV2<>(StockPrice.class));
  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);
  MessageStream<String> stockSymbolStream = appDescriptor.getInputStream(stockSymbolInputDescriptor);
  OutputStream<StockPrice> stockPriceStream = appDescriptor.getOutputStream(stockPriceOutputDescriptor);

  RemoteTableDescriptor<String, Double> remoteTableDescriptor =
      new RemoteTableDescriptor("remote-table")
          .withReadRateLimit(10)
          .withReadFunction(new StockPriceReadFunction());
  CachingTableDescriptor<String, Double> cachedRemoteTableDescriptor =
      new CachingTableDescriptor<>("cached-remote-table", remoteTableDescriptor)
          .withReadTtl(Duration.ofSeconds(5));
  Table<KV<String, Double>> cachedRemoteTable = appDescriptor.getTable(cachedRemoteTableDescriptor);

  stockSymbolStream
      .map(symbol -> new KV<String, Void>(symbol, null))
      .join(cachedRemoteTable, new JoinFn())
      .sendTo(stockPriceStream);

}
 
Example #21
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 5 votes vote down vote up
private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoin() {
  /**
   * Example stream-table join app. Expected partition counts of intermediate streams introduced
   * by partitionBy operations are enclosed in quotes.
   *
   *    input2 (16) -> partitionBy ("32") -> send-to-table t
   *
   *                                      join-table t —————
   *                                       |                |
   *    input1 (64) -> partitionBy ("32") _|                |
   *                                                       join -> output1 (8)
   *                                                        |
   *                                      input3 (32) ——————
   *
   */
  return new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
    MessageStream<KV<Object, Object>> messageStream2 = appDesc.getInputStream(input2Descriptor);
    MessageStream<KV<Object, Object>> messageStream3 = appDesc.getInputStream(input3Descriptor);
    OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);

    TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor(
        "table-id", new KVSerde(new StringSerde(), new StringSerde()));
    Table table = appDesc.getTable(tableDescriptor);

    messageStream2
        .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1")
        .sendTo(table);

    messageStream1
        .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p2")
        .join(table, mock(StreamTableJoinFunction.class))
        .join(messageStream3,
              mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2")
        .sendTo(output1);
  }, config);
}
 
Example #22
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 5 votes vote down vote up
private StreamApplicationDescriptorImpl createStreamGraphWithJoinAndWindow() {

    return new StreamApplicationDescriptorImpl(appDesc -> {
      MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor).map(m -> m);
      MessageStream<KV<Object, Object>> messageStream2 =
        appDesc.getInputStream(input2Descriptor)
            .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1")
            .filter(m -> true);
      MessageStream<KV<Object, Object>> messageStream3 =
        appDesc.getInputStream(input3Descriptor)
            .filter(m -> true)
            .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p2")
            .map(m -> m);
      OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
      OutputStream<KV<Object, Object>> output2 = appDesc.getOutputStream(output2Descriptor);

      messageStream1.map(m -> m)
          .filter(m -> true)
          .window(Windows.keyedTumblingWindow(m -> m, Duration.ofMillis(8), (Serde<KV<Object, Object>>) mock(Serde.class), (Serde<KV<Object, Object>>) mock(Serde.class)), "w1");

      messageStream2.map(m -> m)
          .filter(m -> true)
          .window(Windows.keyedTumblingWindow(m -> m, Duration.ofMillis(16), (Serde<KV<Object, Object>>) mock(Serde.class), (Serde<KV<Object, Object>>) mock(Serde.class)), "w2");

      messageStream1.join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofMillis(1600), "j1").sendTo(output1);
      messageStream3.join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofMillis(100), "j2").sendTo(output2);
      messageStream3.join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofMillis(252), "j3").sendTo(output2);
    }, config);
  }
 
Example #23
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 5 votes vote down vote up
private StreamApplicationDescriptorImpl createStreamGraphWithStreamStreamJoin() {

    /**
     * the graph looks like the following. number of partitions in parentheses. quotes indicate expected value.
     *
     *                               input1 (64) -> map -> join -> output1 (8)
     *                                                       |
     *          input2 (16) -> partitionBy ("64") -> filter -|
     *                                                       |
     * input3 (32) -> filter -> partitionBy ("64") -> map -> join -> output2 (16)
     *
     */
    return new StreamApplicationDescriptorImpl(appDesc -> {
      MessageStream<KV<Object, Object>> messageStream1 =
          appDesc.getInputStream(input1Descriptor)
              .map(m -> m);
      MessageStream<KV<Object, Object>> messageStream2 =
          appDesc.getInputStream(input2Descriptor)
              .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1")
              .filter(m -> true);
      MessageStream<KV<Object, Object>> messageStream3 =
          appDesc.getInputStream(input3Descriptor)
              .filter(m -> true)
              .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p2")
              .map(m -> m);
      OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
      OutputStream<KV<Object, Object>> output2 = appDesc.getOutputStream(output2Descriptor);

      messageStream1
          .join(messageStream2,
              mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(2), "j1")
          .sendTo(output1);
      messageStream3
          .join(messageStream2,
              mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2")
          .sendTo(output2);
    }, config);
  }
 
Example #24
Source File: StreamTableJoinExample.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  Serde<Profile> profileSerde = new JsonSerdeV2<>(Profile.class);
  Serde<PageView> pageViewSerde = new JsonSerdeV2<>(PageView.class);
  Serde<EnrichedPageView> joinResultSerde = new JsonSerdeV2<>(EnrichedPageView.class);

  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME)
      .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
      .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
      .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KafkaInputDescriptor<Profile> profileInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(PROFILE_STREAM_ID, profileSerde);
  KafkaInputDescriptor<PageView> pageViewInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(PAGEVIEW_STREAM_ID, pageViewSerde);
  KafkaOutputDescriptor<EnrichedPageView> joinResultOutputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_TOPIC, joinResultSerde);

  RocksDbTableDescriptor<String, Profile> profileTableDescriptor =
      new RocksDbTableDescriptor<String, Profile>("profile-table", KVSerde.of(new StringSerde(), profileSerde));

  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  MessageStream<Profile> profileStream = appDescriptor.getInputStream(profileInputDescriptor);
  MessageStream<PageView> pageViewStream = appDescriptor.getInputStream(pageViewInputDescriptor);
  OutputStream<EnrichedPageView> joinResultStream = appDescriptor.getOutputStream(joinResultOutputDescriptor);
  Table<KV<String, Profile>> profileTable = appDescriptor.getTable(profileTableDescriptor);

  profileStream
      .map(profile -> KV.of(profile.userId, profile))
      .sendTo(profileTable);

  pageViewStream
      .partitionBy(pv -> pv.userId, pv -> pv, KVSerde.of(new StringSerde(), pageViewSerde), "join")
      .join(profileTable, new JoinFn())
      .sendTo(joinResultStream);
}
 
Example #25
Source File: StreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public <M> OutputStream<M> getOutputStream(OutputDescriptor<M, ?> outputDescriptor) {
  addOutputDescriptor(outputDescriptor);

  String streamId = outputDescriptor.getStreamId();
  Serde serde = outputDescriptor.getSerde();
  KV<Serde, Serde> kvSerdes = getOrCreateStreamSerdes(streamId, serde);
  boolean isKeyed = serde instanceof KVSerde;
  outputStreams.put(streamId, new OutputStreamImpl<>(streamId, kvSerdes.getKey(), kvSerdes.getValue(), isKeyed));
  return outputStreams.get(streamId);
}
 
Example #26
Source File: AzureApplication.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  // Define your system here
  EventHubsSystemDescriptor systemDescriptor = new EventHubsSystemDescriptor("eventhubs");

  // Choose your serializer/deserializer for the EventData payload
  StringSerde serde = new StringSerde();

  // Define the input and output descriptors with respective configs
  EventHubsInputDescriptor<KV<String, String>> inputDescriptor =
      systemDescriptor.getInputDescriptor(INPUT_STREAM_ID, EVENTHUBS_NAMESPACE, EVENTHUBS_INPUT_ENTITY, serde)
          .withSasKeyName(appDescriptor.getConfig().get(EVENTHUBS_SAS_KEY_NAME_CONFIG))
          .withSasKey(appDescriptor.getConfig().get(EVENTHUBS_SAS_KEY_TOKEN_CONFIG));

  EventHubsOutputDescriptor<KV<String, String>> outputDescriptor =
      systemDescriptor.getOutputDescriptor(OUTPUT_STREAM_ID, EVENTHUBS_NAMESPACE, EVENTHUBS_OUTPUT_ENTITY, serde)
          .withSasKeyName(appDescriptor.getConfig().get(EVENTHUBS_SAS_KEY_NAME_CONFIG))
          .withSasKey(appDescriptor.getConfig().get(EVENTHUBS_SAS_KEY_TOKEN_CONFIG));

  // Define the input and output streams with descriptors
  MessageStream<KV<String, String>> eventhubInput = appDescriptor.getInputStream(inputDescriptor);
  OutputStream<KV<String, String>> eventhubOutput = appDescriptor.getOutputStream(outputDescriptor);

  // Define the execution flow with the high-level API
  eventhubInput
      .map((message) -> {
        System.out.println("Sending: ");
        System.out.println("Received Key: " + message.getKey());
        System.out.println("Received Message: " + message.getValue());
        return message;
      })
      .sendTo(eventhubOutput);
}
 
Example #27
Source File: AzureBlobApplication.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  // Define a system descriptor for Kafka
  KafkaSystemDescriptor kafkaSystemDescriptor =
      new KafkaSystemDescriptor("kafka").withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
          .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
          .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KafkaInputDescriptor<PageView> pageViewInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(INPUT_PAGEVIEW_STREAM_ID, new JsonSerdeV2<>(PageView.class));

  // Define a system descriptor for Azure Blob Storage
  GenericSystemDescriptor azureBlobSystemDescriptor =
      new GenericSystemDescriptor(OUTPUT_SYSTEM, "org.apache.samza.system.azureblob.AzureBlobSystemFactory");

  GenericOutputDescriptor<PageViewAvroRecord> azureBlobOuputDescriptor =
      azureBlobSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM, new NoOpSerde<>());

  // Set Kafka as the default system for the job
  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  // Define the input and output streams with descriptors
  MessageStream<PageView> pageViewInput = appDescriptor.getInputStream(pageViewInputDescriptor);
  OutputStream<PageViewAvroRecord> pageViewAvroRecordOutputStream = appDescriptor.getOutputStream(azureBlobOuputDescriptor);

  // Define the execution flow with the high-level API
  pageViewInput
      .map((message) -> {
        LOG.info("Sending: Received PageViewEvent with pageId: " + message.pageId);
        return PageViewAvroRecord.buildPageViewRecord(message);
      })
      .sendTo(pageViewAvroRecordOutputStream);
}
 
Example #28
Source File: CouchbaseTableExample.java    From samza-hello-samza with Apache License 2.0 4 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor app) {

  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME)
      .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
      .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
      .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KafkaInputDescriptor<String> wordInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(INPUT_STREAM_ID, new StringSerde());

  KafkaOutputDescriptor<String> countOutputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM_ID, new StringSerde());

  MyCouchbaseTableWriteFunction writeFn = new MyCouchbaseTableWriteFunction(BUCKET_NAME, CLUSTER_NODES)
      .withBootstrapCarrierDirectPort(COUCHBASE_PORT)
      .withUsernameAndPassword(BUCKET_NAME, BUCKET_PASSWORD)
      .withTimeout(Duration.ofSeconds(5));

  TableRetryPolicy retryPolicy = new TableRetryPolicy()
      .withFixedBackoff(Duration.ofSeconds(1))
      .withStopAfterAttempts(3);

  RemoteTableDescriptor couchbaseTableDescriptor = new RemoteTableDescriptor("couchbase-table")
      .withReadFunction(new NoOpTableReadFunction())
      .withReadRateLimiterDisabled()
      .withWriteFunction(writeFn)
      .withWriteRetryPolicy(retryPolicy)
      .withWriteRateLimit(4);

  app.withDefaultSystem(kafkaSystemDescriptor);
  MessageStream<String> wordStream = app.getInputStream(wordInputDescriptor);
  OutputStream<String> countStream = app.getOutputStream(countOutputDescriptor);
  app.getTable(couchbaseTableDescriptor);

  wordStream
      .flatMap(m -> Arrays.asList(m.split(" ")))
      .filter(word -> word != null && word.length() > 0)
      .map(new MyCountFunction())
      .map(countString -> currentTime() + " " + countString)
      .sendTo(countStream);
}
 
Example #29
Source File: WikipediaApplication.java    From samza-hello-samza with Apache License 2.0 4 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {

  Duration windowDuration =
      appDescriptor.getConfig().containsKey("deploy.test") ? Duration.ofMillis(10) : Duration.ofSeconds(10);
  // Define a SystemDescriptor for Wikipedia data
  WikipediaSystemDescriptor wikipediaSystemDescriptor = new WikipediaSystemDescriptor("irc.wikimedia.org", 6667);

  // Define InputDescriptors for consuming wikipedia data
  WikipediaInputDescriptor wikipediaInputDescriptor = wikipediaSystemDescriptor
      .getInputDescriptor("en-wikipedia")
      .withChannel(WIKIPEDIA_CHANNEL);
  WikipediaInputDescriptor wiktionaryInputDescriptor = wikipediaSystemDescriptor
      .getInputDescriptor("en-wiktionary")
      .withChannel(WIKTIONARY_CHANNEL);
  WikipediaInputDescriptor wikiNewsInputDescriptor = wikipediaSystemDescriptor
      .getInputDescriptor("en-wikinews")
      .withChannel(WIKINEWS_CHANNEL);

  // Define a system descriptor for Kafka
  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor("kafka")
      .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
      .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
      .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  // Define an output descriptor
  KafkaOutputDescriptor<WikipediaStatsOutput> statsOutputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor("wikipedia-stats", new JsonSerdeV2<>(WikipediaStatsOutput.class));


  // Set the default system descriptor to Kafka, so that it is used for all
  // internal resources, e.g., kafka topic for checkpointing, coordinator stream.
  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);
  MessageStream<WikipediaFeedEvent> wikipediaEvents = appDescriptor.getInputStream(wikipediaInputDescriptor);
  MessageStream<WikipediaFeedEvent> wiktionaryEvents = appDescriptor.getInputStream(wiktionaryInputDescriptor);
  MessageStream<WikipediaFeedEvent> wikiNewsEvents = appDescriptor.getInputStream(wikiNewsInputDescriptor);
  OutputStream<WikipediaStatsOutput> wikipediaStats = appDescriptor.getOutputStream(statsOutputDescriptor);

  // Merge inputs
  MessageStream<WikipediaFeedEvent> allWikipediaEvents =
      MessageStream.mergeAll(ImmutableList.of(wikipediaEvents, wiktionaryEvents, wikiNewsEvents));


  // Parse, update stats, prepare output, and send
  allWikipediaEvents
      .map(WikipediaParser::parseEvent)
      .window(Windows.tumblingWindow(windowDuration,
          WikipediaStats::new, new WikipediaStatsAggregator(), WikipediaStats.serde()), "statsWindow")
      .map(this::formatOutput)
      .sendTo(wikipediaStats);
}
 
Example #30
Source File: TestOperatorImplGraph.java    From samza with Apache License 2.0 4 votes vote down vote up
@Test
public void testGetOutputToInputStreams() {
  String inputStreamId1 = "input1";
  String inputStreamId2 = "input2";
  String inputStreamId3 = "input3";
  String inputSystem = "input-system";

  String outputStreamId1 = "output1";
  String outputStreamId2 = "output2";
  String outputSystem = "output-system";

  String intStreamId1 = "test-app-1-partition_by-p1";
  String intStreamId2 = "test-app-1-partition_by-p2";
  String intSystem = "test-system";

  HashMap<String, String> configs = new HashMap<>();
  configs.put(JobConfig.JOB_NAME, "test-app");
  configs.put(JobConfig.JOB_DEFAULT_SYSTEM, intSystem);
  StreamTestUtils.addStreamConfigs(configs, inputStreamId1, inputSystem, inputStreamId1);
  StreamTestUtils.addStreamConfigs(configs, inputStreamId2, inputSystem, inputStreamId2);
  StreamTestUtils.addStreamConfigs(configs, inputStreamId3, inputSystem, inputStreamId3);
  StreamTestUtils.addStreamConfigs(configs, outputStreamId1, outputSystem, outputStreamId1);
  StreamTestUtils.addStreamConfigs(configs, outputStreamId2, outputSystem, outputStreamId2);
  Config config = new MapConfig(configs);
  when(this.context.getJobContext().getConfig()).thenReturn(config);

  StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
    GenericSystemDescriptor isd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
    GenericInputDescriptor inputDescriptor1 = isd.getInputDescriptor(inputStreamId1, mock(Serde.class));
    GenericInputDescriptor inputDescriptor2 = isd.getInputDescriptor(inputStreamId2, mock(Serde.class));
    GenericInputDescriptor inputDescriptor3 = isd.getInputDescriptor(inputStreamId3, mock(Serde.class));
    GenericSystemDescriptor osd = new GenericSystemDescriptor(outputSystem, "mockFactoryClass");
    GenericOutputDescriptor outputDescriptor1 = osd.getOutputDescriptor(outputStreamId1, mock(Serde.class));
    GenericOutputDescriptor outputDescriptor2 = osd.getOutputDescriptor(outputStreamId2, mock(Serde.class));
    MessageStream messageStream1 = appDesc.getInputStream(inputDescriptor1).map(m -> m);
    MessageStream messageStream2 = appDesc.getInputStream(inputDescriptor2).filter(m -> true);
    MessageStream messageStream3 =
        appDesc.getInputStream(inputDescriptor3)
            .filter(m -> true)
            .partitionBy(m -> "m", m -> m, mock(KVSerde.class),  "p1")
            .map(m -> m);
    OutputStream<Object> outputStream1 = appDesc.getOutputStream(outputDescriptor1);
    OutputStream<Object> outputStream2 = appDesc.getOutputStream(outputDescriptor2);

    messageStream1
        .join(messageStream2, mock(JoinFunction.class),
            mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(2), "j1")
        .partitionBy(m -> "m", m -> m, mock(KVSerde.class), "p2")
        .sendTo(outputStream1);
    messageStream3
        .join(messageStream2, mock(JoinFunction.class),
            mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2")
        .sendTo(outputStream2);
  }, config);

  Multimap<SystemStream, SystemStream> outputToInput =
      OperatorImplGraph.getIntermediateToInputStreamsMap(graphSpec.getOperatorSpecGraph(), new StreamConfig(config));
  Collection<SystemStream> inputs = outputToInput.get(new SystemStream(intSystem, intStreamId2));
  assertEquals(inputs.size(), 2);
  assertTrue(inputs.contains(new SystemStream(inputSystem, inputStreamId1)));
  assertTrue(inputs.contains(new SystemStream(inputSystem, inputStreamId2)));

  inputs = outputToInput.get(new SystemStream(intSystem, intStreamId1));
  assertEquals(inputs.size(), 1);
  assertEquals(inputs.iterator().next(), new SystemStream(inputSystem, inputStreamId3));
}