org.apache.samza.operators.KV Java Examples

The following examples show how to use org.apache.samza.operators.KV. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestKinesisInputDescriptor.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testConfigGeneration() {
  String systemName = "kinesis";
  String streamName = "Seine";
  KinesisSystemDescriptor sd = new KinesisSystemDescriptor(systemName);
  Map<String, String> cliConfig = new HashMap<>();
  cliConfig.put("key1", "value1");
  KinesisInputDescriptor<KV<String, byte[]>> id = sd.getInputDescriptor(streamName, new NoOpSerde<byte[]>())
      .withRegion("Paris")
      .withAccessKey("accessKey")
      .withSecretKey("secretKey")
      .withKCLConfig(cliConfig);

  Map<String, String> generatedConfig = id.toConfig();
  Assert.assertEquals(5, generatedConfig.size());

  Assert.assertEquals(systemName, generatedConfig.get("streams.Seine.samza.system"));
  Assert.assertEquals("Paris",
      generatedConfig.get(String.format(KinesisConfig.CONFIG_STREAM_REGION, systemName, streamName)));
  Assert.assertEquals("accessKey",
      generatedConfig.get(String.format(KinesisConfig.CONFIG_STREAM_ACCESS_KEY, systemName, streamName)));
  Assert.assertEquals("secretKey",
      generatedConfig.get(String.format(KinesisConfig.CONFIG_STREAM_SECRET_KEY, systemName, streamName)));
  Assert.assertEquals("value1", generatedConfig.get(
      String.format(KinesisConfig.CONFIG_STREAM_KINESIS_CLIENT_LIB_CONFIG, systemName, streamName) + "key1"));
}
 
Example #2
Source File: TestOperatorSpec.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testStreamOperatorSpecWithMapWithFunctionReference() {
  MapFunction<KV<String, Object>, Object> mapFn = KV::getValue;
  StreamOperatorSpec<KV<String, Object>, Object> streamOperatorSpec =
      OperatorSpecs.createMapOperatorSpec(mapFn, "op0");
  StreamOperatorSpec<TestMessageEnvelope, TestOutputMessageEnvelope> cloneOperatorSpec =
      (StreamOperatorSpec<TestMessageEnvelope, TestOutputMessageEnvelope>) OperatorSpecTestUtils.copyOpSpec(streamOperatorSpec);
  assertNotEquals(streamOperatorSpec, cloneOperatorSpec);
  assertTrue(streamOperatorSpec.isClone(cloneOperatorSpec));
  MapFunction userFn = (MapFunction) Whitebox.getInternalState(streamOperatorSpec, "mapFn");
  assertEquals(userFn, mapFn);
  assertNotEquals(streamOperatorSpec.getTransformFn(), cloneOperatorSpec.getTransformFn());
  MapFunction clonedUserFn = (MapFunction) Whitebox.getInternalState(cloneOperatorSpec, "mapFn");
  assertTrue(cloneOperatorSpec.getTransformFn() instanceof FlatMapFunction);
  assertTrue(clonedUserFn instanceof MapFunction);
  assertNotEquals(userFn, clonedUserFn);
}
 
Example #3
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl getKeyedTumblingWindowStreamGraph(AccumulationMode mode,
    Duration duration, Trigger<KV<Integer, Integer>> earlyTrigger) throws IOException {

  StreamApplication userApp = appDesc -> {
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());
    GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde);
    appDesc.getInputStream(inputDescriptor)
        .window(Windows.keyedTumblingWindow(KV::getKey, duration, new IntegerSerde(), kvSerde)
            .setEarlyTrigger(earlyTrigger).setAccumulationMode(mode), "w1")
        .sink((message, messageCollector, taskCoordinator) -> {
          SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream");
          messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message));
        });
  };

  return new StreamApplicationDescriptorImpl(userApp, config);
}
 
Example #4
Source File: OrderShipmentJoinExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<OrderRecord> orderStreamDescriptor =
      trackingSystem.getInputDescriptor("orders", new JsonSerdeV2<>(OrderRecord.class));
  KafkaInputDescriptor<ShipmentRecord> shipmentStreamDescriptor =
      trackingSystem.getInputDescriptor("shipments", new JsonSerdeV2<>(ShipmentRecord.class));
  KafkaOutputDescriptor<KV<String, FulfilledOrderRecord>> fulfilledOrdersStreamDescriptor =
      trackingSystem.getOutputDescriptor("fulfilledOrders",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(FulfilledOrderRecord.class)));

  appDescriptor.getInputStream(orderStreamDescriptor)
      .join(appDescriptor.getInputStream(shipmentStreamDescriptor), new MyJoinFunction(),
          new StringSerde(), new JsonSerdeV2<>(OrderRecord.class), new JsonSerdeV2<>(ShipmentRecord.class),
          Duration.ofMinutes(1), "join")
      .map(fulFilledOrder -> KV.of(fulFilledOrder.orderId, fulFilledOrder))
      .sendTo(appDescriptor.getOutputStream(fulfilledOrdersStreamDescriptor));

}
 
Example #5
Source File: TestSamzaCookBookExamples.java    From samza-hello-samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testTumblingWindowExample() {
  List<PageView> pageViewEvents = TestUtils.genSamplePageViewData();

  InMemorySystemDescriptor inMemorySystem = new InMemorySystemDescriptor("kafka");

  InMemoryInputDescriptor<KV<String, PageView>> pageViewInputDescriptor =
      inMemorySystem.getInputDescriptor("pageview-tumbling-input", new NoOpSerde<KV<String, PageView>>());

  InMemoryOutputDescriptor<KV<String, UserPageViews>> userPageViewOutputDescriptor =
      inMemorySystem.getOutputDescriptor("pageview-tumbling-output", new NoOpSerde<KV<String, UserPageViews>>());

  TestRunner
      .of(new TumblingWindowExample())
      .addInputStream(pageViewInputDescriptor, pageViewEvents)
      .addOutputStream(userPageViewOutputDescriptor, 1)
      .run(Duration.ofMinutes(1));

  Assert.assertTrue(TestRunner.consumeStream(userPageViewOutputDescriptor, Duration.ofMillis(1000)).get(0).size() > 1);
}
 
Example #6
Source File: KeyValueStoreExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));

  KafkaOutputDescriptor<KV<String, StatsOutput>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(StatsOutput.class)));

  appDescriptor.withDefaultSystem(trackingSystem);
  MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<KV<String, StatsOutput>> pageViewEventPerMember = appDescriptor.getOutputStream(outputStreamDescriptor);

  pageViewEvents
      .partitionBy(pve -> pve.getMemberId(), pve -> pve,
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewEvent.class)), "partitionBy")
      .map(KV::getValue)
      .flatMap(new MyStatsCounter())
      .map(stats -> KV.of(stats.memberId, stats))
      .sendTo(pageViewEventPerMember);
}
 
Example #7
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl getTumblingWindowStreamGraph(AccumulationMode mode,
    Duration duration, Trigger<KV<Integer, Integer>> earlyTrigger) throws IOException {
  StreamApplication userApp = appDesc -> {
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());
    GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde);
    appDesc.getInputStream(inputDescriptor)
        .window(Windows.tumblingWindow(duration, kvSerde).setEarlyTrigger(earlyTrigger)
            .setAccumulationMode(mode), "w1")
        .sink((message, messageCollector, taskCoordinator) -> {
          SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream");
          messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message));
        });
  };

  return new StreamApplicationDescriptorImpl(userApp, config);
}
 
Example #8
Source File: TestLocalTableEndToEnd.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDesc) {
  Table<KV<Integer, Profile>> table = appDesc.getTable(
      new InMemoryTableDescriptor("t1", KVSerde.of(new IntegerSerde(), new ProfileJsonSerde())));
  DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
  GenericInputDescriptor<Profile> profileISD = ksd.getInputDescriptor("Profile", new NoOpSerde<>());
  appDesc.getInputStream(profileISD)
      .map(m -> new KV(m.getMemberId(), m))
      .sendTo(table);

  GenericInputDescriptor<PageView> pageViewISD = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
  appDesc.getInputStream(pageViewISD)
      .map(pv -> {
        received.add(pv);
        return pv;
      })
      .partitionBy(PageView::getMemberId, v -> v, KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()), "p1")
      .join(table, new PageViewToProfileJoinFunction())
      .sink((m, collector, coordinator) -> joined.add(m));
}
 
Example #9
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl getKeyedSessionWindowStreamGraph(AccumulationMode mode, Duration duration) throws IOException {
  StreamApplication userApp = appDesc -> {
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());
    GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde);
    appDesc.getInputStream(inputDescriptor)
        .window(Windows.keyedSessionWindow(KV::getKey, duration, new IntegerSerde(), kvSerde)
            .setAccumulationMode(mode), "w1")
        .sink((message, messageCollector, taskCoordinator) -> {
          SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream");
          messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message));
        });
  };

  return new StreamApplicationDescriptorImpl(userApp, config);
}
 
Example #10
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl getAggregateTumblingWindowStreamGraph(AccumulationMode mode, Duration timeDuration,
      Trigger<IntegerEnvelope> earlyTrigger) throws IOException {
  StreamApplication userApp = appDesc -> {
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());
    GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde);
    MessageStream<KV<Integer, Integer>> integers = appDesc.getInputStream(inputDescriptor);

    integers
        .map(new KVMapFunction())
        .window(Windows.<IntegerEnvelope, Integer>tumblingWindow(timeDuration, () -> 0, (m, c) -> c + 1, new IntegerSerde())
            .setEarlyTrigger(earlyTrigger)
            .setAccumulationMode(mode), "w1")
        .sink((message, messageCollector, taskCoordinator) -> {
          SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream");
          messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message));
        });
  };

  return new StreamApplicationDescriptorImpl(userApp, config);
}
 
Example #11
Source File: AsyncStreamTaskIntegrationTest.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testAsyncTaskWithMultiplePartition() throws Exception {
  Map<Integer, List<KV>> inputPartitionData = new HashMap<>();
  Map<Integer, List<Integer>> expectedOutputPartitionData = new HashMap<>();
  genData(inputPartitionData, expectedOutputPartitionData);

  InMemorySystemDescriptor isd = new InMemorySystemDescriptor("async-test");

  InMemoryInputDescriptor<KV> imid = isd
      .getInputDescriptor("ints", new NoOpSerde<KV>());
  InMemoryOutputDescriptor imod = isd
      .getOutputDescriptor("ints-out", new NoOpSerde<>());

  TestRunner
      .of(MyAsyncStreamTask.class)
      .addInputStream(imid, inputPartitionData)
      .addOutputStream(imod, 5)
      .run(Duration.ofSeconds(2));

  StreamAssert.containsInOrder(expectedOutputPartitionData, imod, Duration.ofMillis(1000));
}
 
Example #12
Source File: BroadcastExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KVSerde<String, PageViewEvent> serde = KVSerde.of(new StringSerde("UTF-8"), new JsonSerdeV2<>(PageViewEvent.class));
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
  KafkaInputDescriptor<KV<String, PageViewEvent>> pageViewEvent =
      trackingSystem.getInputDescriptor("pageViewEvent", serde);
  KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream1 =
      trackingSystem.getOutputDescriptor("outStream1", serde);
  KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream2 =
      trackingSystem.getOutputDescriptor("outStream2", serde);
  KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream3 =
      trackingSystem.getOutputDescriptor("outStream3", serde);

  MessageStream<KV<String, PageViewEvent>> inputStream = appDescriptor.getInputStream(pageViewEvent);
  inputStream.filter(m -> m.key.equals("key1")).sendTo(appDescriptor.getOutputStream(outStream1));
  inputStream.filter(m -> m.key.equals("key2")).sendTo(appDescriptor.getOutputStream(outStream2));
  inputStream.filter(m -> m.key.equals("key3")).sendTo(appDescriptor.getOutputStream(outStream3));
}
 
Example #13
Source File: StreamTaskIntegrationTest.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testSyncTaskWithMultiplePartition() throws Exception {
  Map<Integer, List<KV>> inputPartitionData = new HashMap<>();
  Map<Integer, List<Integer>> expectedOutputPartitionData = new HashMap<>();
  genData(inputPartitionData, expectedOutputPartitionData);

  InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");

  InMemoryInputDescriptor<KV> imid = isd
      .getInputDescriptor("input", new NoOpSerde<KV>());

  InMemoryOutputDescriptor<Integer> imod = isd
      .getOutputDescriptor("output", new NoOpSerde<Integer>());

  TestRunner
      .of(MyStreamTestTask.class)
      .addInputStream(imid, inputPartitionData)
      .addOutputStream(imod, 5)
      .addExternalContext(new TestContext(10))
      .run(Duration.ofSeconds(2));

  StreamAssert.containsInOrder(expectedOutputPartitionData, imod, Duration.ofMillis(1000));
}
 
Example #14
Source File: TestEventHubsInputDescriptor.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testWithoutEntityConnectionConfigs() {
  String systemName = "eventHub";
  String streamId = "input-stream";

  EventHubsSystemDescriptor systemDescriptor = new EventHubsSystemDescriptor(systemName);

  EventHubsInputDescriptor<KV<String, String>> inputDescriptor = systemDescriptor
      .getInputDescriptor(streamId, "entity-namespace", "entity3", new StringSerde());

  Map<String, String> generatedConfigs = inputDescriptor.toConfig();
  assertEquals("eventHub", generatedConfigs.get("streams.input-stream.samza.system"));
  assertEquals("entity-namespace", generatedConfigs.get(String.format(EventHubConfig.CONFIG_STREAM_NAMESPACE, streamId)));
  assertEquals("entity3", generatedConfigs.get(String.format(EventHubConfig.CONFIG_STREAM_ENTITYPATH, streamId)));
  assertNull(generatedConfigs.get(String.format(EventHubConfig.CONFIG_STREAM_SAS_KEY_NAME, streamId)));
  assertNull(generatedConfigs.get(String.format(EventHubConfig.CONFIG_STREAM_SAS_TOKEN, streamId)));
  assertNull(generatedConfigs.get(String.format(EventHubConfig.CONFIG_STREAM_CONSUMER_GROUP, streamId)));
  assertEquals(3, generatedConfigs.size()); // verify that there are no other configs
}
 
Example #15
Source File: TransactionalStateMultiStoreIntegrationTest.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(INPUT_SYSTEM);
  KVSerde<String, String> serde = KVSerde.of(new StringSerde(), new StringSerde());

  KafkaInputDescriptor<KV<String, String>> isd = ksd.getInputDescriptor(INPUT_TOPIC, serde);

  RocksDbTableDescriptor<String, String> td1 = new RocksDbTableDescriptor<>(STORE_1_NAME, serde)
      .withChangelogStream(changelogTopic)
      .withChangelogReplicationFactor(1);

  RocksDbTableDescriptor<String, String> td2 = new RocksDbTableDescriptor<>(STORE_2_NAME, serde)
      .withChangelogStream(STORE_2_CHANGELOG)
      .withChangelogReplicationFactor(1);

  appDescriptor
      .withInputStream(isd)
      .withTaskFactory((StreamTaskFactory) () -> new MyTask())
      .withTable(td1)
      .withTable(td2);
}
 
Example #16
Source File: StreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public <M> MessageStream<M> getInputStream(InputDescriptor<M, ?> inputDescriptor) {
  SystemDescriptor systemDescriptor = inputDescriptor.getSystemDescriptor();
  Optional<StreamExpander> expander = systemDescriptor.getExpander();
  if (expander.isPresent()) {
    return expander.get().apply(this, inputDescriptor);
  }

  // TODO: SAMZA-1841: need to add to the broadcast streams if inputDescriptor is for a broadcast stream
  addInputDescriptor(inputDescriptor);

  String streamId = inputDescriptor.getStreamId();
  Serde serde = inputDescriptor.getSerde();
  KV<Serde, Serde> kvSerdes = getOrCreateStreamSerdes(streamId, serde);
  boolean isKeyed = serde instanceof KVSerde;
  InputTransformer transformer = inputDescriptor.getTransformer().orElse(null);
  InputOperatorSpec inputOperatorSpec =
      OperatorSpecs.createInputOperatorSpec(streamId, kvSerdes.getKey(), kvSerdes.getValue(),
          transformer, isKeyed, this.getNextOpId(OpCode.INPUT, null));
  inputOperators.put(streamId, inputOperatorSpec);
  return new MessageStreamImpl(this, inputOperators.get(streamId));
}
 
Example #17
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testMaxPartitionLimit() {
  int partitionLimit = IntermediateStreamManager.MAX_INFERRED_PARTITIONS;

  ExecutionPlanner planner = new ExecutionPlanner(config, streamManager);
  StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<KV<Object, Object>> input1 = appDesc.getInputStream(input4Descriptor);
    OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
    input1.partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1").map(kv -> kv).sendTo(output1);
  }, config);

  JobGraph jobGraph = (JobGraph) planner.plan(graphSpec);

  // Partitions should be the same as input1
  jobGraph.getIntermediateStreams().forEach(edge -> {
    assertEquals(partitionLimit, edge.getPartitionCount()); // max of input1 and output1
  });
}
 
Example #18
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoinAndSendToSameTable() {
  /**
   * A special example of stream-table join where a stream is joined with a table, and the result is
   * sent to the same table. This example is necessary to ensure {@link ExecutionPlanner} does not
   * get stuck traversing the virtual cycle between stream-table-join and send-to-table operator specs
   * indefinitely.
   *
   * The reason such virtual cycle is present is to support computing partitions of intermediate
   * streams participating in stream-table joins. Please, refer to SAMZA SEP-16 for more details.
   */
  return new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);

    TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor(
      "table-id", new KVSerde(new StringSerde(), new StringSerde()));
    Table table = appDesc.getTable(tableDescriptor);

    messageStream1
      .join(table, mock(StreamTableJoinFunction.class))
      .sendTo(table);

  }, config);
}
 
Example #19
Source File: StreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 6 votes vote down vote up
/**
 * Internal helper for {@link MessageStreamImpl} to add an intermediate {@link MessageStream} to the graph.
 * An intermediate {@link MessageStream} is both an output and an input stream.
 *
 * @param streamId the id of the stream to be created.
 * @param serde the {@link Serde} to use for the message in the intermediate stream. If null, the default serde
 *              is used.
 * @param isBroadcast whether the stream is a broadcast stream.
 * @param <M> the type of messages in the intermediate {@link MessageStream}
 * @return  the intermediate {@link MessageStreamImpl}
 */
@VisibleForTesting
public <M> IntermediateMessageStreamImpl<M> getIntermediateStream(String streamId, Serde<M> serde, boolean isBroadcast) {
  Preconditions.checkNotNull(serde, "serde must not be null for intermediate stream: " + streamId);
  Preconditions.checkState(!inputOperators.containsKey(streamId) && !outputStreams.containsKey(streamId),
      "getIntermediateStream must not be called multiple times with the same streamId: " + streamId);

  if (isBroadcast) {
    intermediateBroadcastStreamIds.add(streamId);
  }

  boolean isKeyed = serde instanceof KVSerde;
  KV<Serde, Serde> kvSerdes = getOrCreateStreamSerdes(streamId, serde);

  InputTransformer transformer = (InputTransformer) getDefaultSystemDescriptor()
      .flatMap(SystemDescriptor::getTransformer).orElse(null);

  InputOperatorSpec inputOperatorSpec =
      OperatorSpecs.createInputOperatorSpec(streamId, kvSerdes.getKey(), kvSerdes.getValue(),
          transformer, isKeyed, this.getNextOpId(OpCode.INPUT, null));
  inputOperators.put(streamId, inputOperatorSpec);
  outputStreams.put(streamId, new OutputStreamImpl(streamId, kvSerdes.getKey(), kvSerdes.getValue(), isKeyed));
  return new IntermediateMessageStreamImpl<>(this, inputOperators.get(streamId), outputStreams.get(streamId));
}
 
Example #20
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl createStreamGraphWithInvalidStreamTableJoinWithSideInputs() {
  /**
   * Example stream-table join that is invalid due to disagreement in partition count between the
   * stream behind table t and another joined stream. Table t is configured with input2 (16) as
   * side-input stream.
   *
   *                   join-table t -> output1 (8)
   *                         |
   *    input1 (64) —————————
   *
   */
  return new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
    OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);

    TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor(
      "table-id", new KVSerde(new StringSerde(), new StringSerde()))
        .withSideInputs(Arrays.asList("input2"))
        .withSideInputsProcessor(mock(SideInputsProcessor.class));
    Table table = appDesc.getTable(tableDescriptor);

    messageStream1
        .join(table, mock(StreamTableJoinFunction.class))
        .sendTo(output1);
  }, config);
}
 
Example #21
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoinWithSideInputs() {
  /**
   * Example stream-table join where table t is configured with input1 (64) as a side-input stream.
   *
   *                                   join-table t -> output1 (8)
   *                                        |
   *    input2 (16) -> partitionBy ("64") __|
   *
   */
  return new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<KV<Object, Object>> messageStream2 = appDesc.getInputStream(input2Descriptor);
    OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);

    TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor(
      "table-id", new KVSerde(new StringSerde(), new StringSerde()))
        .withSideInputs(Arrays.asList("input1"))
        .withSideInputsProcessor(mock(SideInputsProcessor.class));
    Table table = appDesc.getTable(tableDescriptor);

    messageStream2
        .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1")
        .join(table, mock(StreamTableJoinFunction.class))
        .sendTo(output1);
  }, config);
}
 
Example #22
Source File: ApplicationDescriptorImpl.java    From samza with Apache License 2.0 6 votes vote down vote up
KV<Serde, Serde> getOrCreateTableSerdes(String tableId, KVSerde kvSerde) {
  Serde keySerde, valueSerde;
  keySerde = kvSerde.getKeySerde();
  valueSerde = kvSerde.getValueSerde();

  if (!tableSerdes.containsKey(tableId)) {
    tableSerdes.put(tableId, KV.of(keySerde, valueSerde));
    return tableSerdes.get(tableId);
  }

  KV<Serde, Serde> currentSerdePair = tableSerdes.get(tableId);
  if (!currentSerdePair.getKey().equals(keySerde) || !currentSerdePair.getValue().equals(valueSerde)) {
    throw new IllegalArgumentException(String.format("Serde for table %s is already defined. Cannot change it to "
        + "different serdes.", tableId));
  }
  return streamSerdes.get(tableId);
}
 
Example #23
Source File: AsyncStreamTaskIntegrationTest.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testAsyncTaskWithMultiplePartitionMultithreaded() throws Exception {
  Map<Integer, List<KV>> inputPartitionData = new HashMap<>();
  Map<Integer, List<Integer>> expectedOutputPartitionData = new HashMap<>();
  genData(inputPartitionData, expectedOutputPartitionData);

  InMemorySystemDescriptor isd = new InMemorySystemDescriptor("async-test");

  InMemoryInputDescriptor<KV> imid = isd
      .getInputDescriptor("ints", new NoOpSerde<>());

  InMemoryOutputDescriptor imod = isd
      .getOutputDescriptor("ints-out", new NoOpSerde<>());

  TestRunner
      .of(MyAsyncStreamTask.class)
      .addInputStream(imid, inputPartitionData)
      .addOutputStream(imod, 5)
      .addConfig("task.max.concurrency", "4")
      .run(Duration.ofSeconds(2));

  StreamAssert.containsInAnyOrder(expectedOutputPartitionData, imod, Duration.ofMillis(1000));
}
 
Example #24
Source File: TumblingWindowApp.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
  KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
  KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);

  MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
  OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);

  pageViews
      .filter(m -> !FILTER_KEY.equals(m.getUserId()))
      .window(Windows.keyedTumblingWindow(PageView::getUserId, Duration.ofSeconds(3),
          new StringSerde(), new JsonSerdeV2<>(PageView.class)), "tumblingWindow")
      .map(m -> KV.of(m.getKey().getKey(), m.getMessage().size()))
      .sendTo(outputStream);

}
 
Example #25
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl createSimpleGraph() {
  /**
   * a simple graph of partitionBy and map
   *
   * input1 -> partitionBy -> map -> output1
   *
   */
  return new StreamApplicationDescriptorImpl(appDesc-> {
    MessageStream<KV<Object, Object>> input1 = appDesc.getInputStream(input1Descriptor);
    OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
    input1
        .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1")
        .map(kv -> kv)
        .sendTo(output1);
  }, config);
}
 
Example #26
Source File: AppWithGlobalConfigExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));

  KafkaOutputDescriptor<KV<String, PageViewCount>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewCount.class)));

  appDescriptor.getInputStream(inputStreamDescriptor)
      .window(Windows.<PageViewEvent, String, Integer>keyedTumblingWindow(PageViewEvent::getMemberId, Duration.ofSeconds(10), () -> 0, (m, c) -> c + 1,
          null, null)
          .setEarlyTrigger(Triggers.repeat(Triggers.count(5)))
          .setAccumulationMode(AccumulationMode.DISCARDING), "window1")
      .map(m -> KV.of(m.getKey().getKey(), buildPageViewCount(m)))
      .sendTo(appDescriptor.getOutputStream(outputStreamDescriptor));

  appDescriptor.withMetricsReporterFactories(new HashMap<>());
}
 
Example #27
Source File: TestEventHubsOutputDescriptor.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testEntityConnectionConfigs() {
  String systemName = "eventHub";
  String streamId = "output-stream";

  EventHubsSystemDescriptor systemDescriptor = new EventHubsSystemDescriptor(systemName);

  EventHubsOutputDescriptor<KV<String, String>> outputDescriptor = systemDescriptor
      .getOutputDescriptor(streamId, "entity-namespace", "entity3", new StringSerde())
      .withSasKeyName("secretkey")
      .withSasKey("sasToken-123");

  Map<String, String> generatedConfigs = outputDescriptor.toConfig();
  assertEquals("eventHub", generatedConfigs.get("streams.output-stream.samza.system"));
  assertEquals("entity-namespace", generatedConfigs.get(String.format(EventHubConfig.CONFIG_STREAM_NAMESPACE, streamId)));
  assertEquals("entity3", generatedConfigs.get(String.format(EventHubConfig.CONFIG_STREAM_ENTITYPATH, streamId)));
  assertEquals("secretkey", generatedConfigs.get(String.format(EventHubConfig.CONFIG_STREAM_SAS_KEY_NAME, streamId)));
  assertEquals("sasToken-123", generatedConfigs.get(String.format(EventHubConfig.CONFIG_STREAM_SAS_TOKEN, streamId)));
}
 
Example #28
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 5 votes vote down vote up
private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoin() {
  /**
   * Example stream-table join app. Expected partition counts of intermediate streams introduced
   * by partitionBy operations are enclosed in quotes.
   *
   *    input2 (16) -> partitionBy ("32") -> send-to-table t
   *
   *                                      join-table t —————
   *                                       |                |
   *    input1 (64) -> partitionBy ("32") _|                |
   *                                                       join -> output1 (8)
   *                                                        |
   *                                      input3 (32) ——————
   *
   */
  return new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
    MessageStream<KV<Object, Object>> messageStream2 = appDesc.getInputStream(input2Descriptor);
    MessageStream<KV<Object, Object>> messageStream3 = appDesc.getInputStream(input3Descriptor);
    OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);

    TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor(
        "table-id", new KVSerde(new StringSerde(), new StringSerde()));
    Table table = appDesc.getTable(tableDescriptor);

    messageStream2
        .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1")
        .sendTo(table);

    messageStream1
        .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p2")
        .join(table, mock(StreamTableJoinFunction.class))
        .join(messageStream3,
              mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2")
        .sendTo(output1);
  }, config);
}
 
Example #29
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 5 votes vote down vote up
private StreamApplicationDescriptorImpl createStreamGraphWithJoinAndWindow() {

    return new StreamApplicationDescriptorImpl(appDesc -> {
      MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor).map(m -> m);
      MessageStream<KV<Object, Object>> messageStream2 =
        appDesc.getInputStream(input2Descriptor)
            .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1")
            .filter(m -> true);
      MessageStream<KV<Object, Object>> messageStream3 =
        appDesc.getInputStream(input3Descriptor)
            .filter(m -> true)
            .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p2")
            .map(m -> m);
      OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
      OutputStream<KV<Object, Object>> output2 = appDesc.getOutputStream(output2Descriptor);

      messageStream1.map(m -> m)
          .filter(m -> true)
          .window(Windows.keyedTumblingWindow(m -> m, Duration.ofMillis(8), (Serde<KV<Object, Object>>) mock(Serde.class), (Serde<KV<Object, Object>>) mock(Serde.class)), "w1");

      messageStream2.map(m -> m)
          .filter(m -> true)
          .window(Windows.keyedTumblingWindow(m -> m, Duration.ofMillis(16), (Serde<KV<Object, Object>>) mock(Serde.class), (Serde<KV<Object, Object>>) mock(Serde.class)), "w2");

      messageStream1.join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofMillis(1600), "j1").sendTo(output1);
      messageStream3.join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofMillis(100), "j2").sendTo(output2);
      messageStream3.join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofMillis(252), "j3").sendTo(output2);
    }, config);
  }
 
Example #30
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 5 votes vote down vote up
private StreamApplicationDescriptorImpl createStreamGraphWithStreamStreamJoin() {

    /**
     * the graph looks like the following. number of partitions in parentheses. quotes indicate expected value.
     *
     *                               input1 (64) -> map -> join -> output1 (8)
     *                                                       |
     *          input2 (16) -> partitionBy ("64") -> filter -|
     *                                                       |
     * input3 (32) -> filter -> partitionBy ("64") -> map -> join -> output2 (16)
     *
     */
    return new StreamApplicationDescriptorImpl(appDesc -> {
      MessageStream<KV<Object, Object>> messageStream1 =
          appDesc.getInputStream(input1Descriptor)
              .map(m -> m);
      MessageStream<KV<Object, Object>> messageStream2 =
          appDesc.getInputStream(input2Descriptor)
              .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1")
              .filter(m -> true);
      MessageStream<KV<Object, Object>> messageStream3 =
          appDesc.getInputStream(input3Descriptor)
              .filter(m -> true)
              .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p2")
              .map(m -> m);
      OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
      OutputStream<KV<Object, Object>> output2 = appDesc.getOutputStream(output2Descriptor);

      messageStream1
          .join(messageStream2,
              mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(2), "j1")
          .sendTo(output1);
      messageStream3
          .join(messageStream2,
              mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2")
          .sendTo(output2);
    }, config);
  }