org.apache.samza.serializers.KVSerde Java Examples

The following examples show how to use org.apache.samza.serializers.KVSerde. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FilterExample.java    From samza-hello-samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME)
      .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
      .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
      .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KVSerde<String, PageView> serde = KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageView.class));
  KafkaInputDescriptor<KV<String, PageView>> inputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(INPUT_STREAM_ID, serde);
  KafkaOutputDescriptor<KV<String, PageView>> outputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM_ID, serde);

  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  MessageStream<KV<String, PageView>> pageViews = appDescriptor.getInputStream(inputDescriptor);
  OutputStream<KV<String, PageView>> filteredPageViews = appDescriptor.getOutputStream(outputDescriptor);

  pageViews
      .filter(kv -> !INVALID_USER_ID.equals(kv.value.userId))
      .sendTo(filteredPageViews);
}
 
Example #2
Source File: StreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public <M> MessageStream<M> getInputStream(InputDescriptor<M, ?> inputDescriptor) {
  SystemDescriptor systemDescriptor = inputDescriptor.getSystemDescriptor();
  Optional<StreamExpander> expander = systemDescriptor.getExpander();
  if (expander.isPresent()) {
    return expander.get().apply(this, inputDescriptor);
  }

  // TODO: SAMZA-1841: need to add to the broadcast streams if inputDescriptor is for a broadcast stream
  addInputDescriptor(inputDescriptor);

  String streamId = inputDescriptor.getStreamId();
  Serde serde = inputDescriptor.getSerde();
  KV<Serde, Serde> kvSerdes = getOrCreateStreamSerdes(streamId, serde);
  boolean isKeyed = serde instanceof KVSerde;
  InputTransformer transformer = inputDescriptor.getTransformer().orElse(null);
  InputOperatorSpec inputOperatorSpec =
      OperatorSpecs.createInputOperatorSpec(streamId, kvSerdes.getKey(), kvSerdes.getValue(),
          transformer, isKeyed, this.getNextOpId(OpCode.INPUT, null));
  inputOperators.put(streamId, inputOperatorSpec);
  return new MessageStreamImpl(this, inputOperators.get(streamId));
}
 
Example #3
Source File: StreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 6 votes vote down vote up
/**
 * Internal helper for {@link MessageStreamImpl} to add an intermediate {@link MessageStream} to the graph.
 * An intermediate {@link MessageStream} is both an output and an input stream.
 *
 * @param streamId the id of the stream to be created.
 * @param serde the {@link Serde} to use for the message in the intermediate stream. If null, the default serde
 *              is used.
 * @param isBroadcast whether the stream is a broadcast stream.
 * @param <M> the type of messages in the intermediate {@link MessageStream}
 * @return  the intermediate {@link MessageStreamImpl}
 */
@VisibleForTesting
public <M> IntermediateMessageStreamImpl<M> getIntermediateStream(String streamId, Serde<M> serde, boolean isBroadcast) {
  Preconditions.checkNotNull(serde, "serde must not be null for intermediate stream: " + streamId);
  Preconditions.checkState(!inputOperators.containsKey(streamId) && !outputStreams.containsKey(streamId),
      "getIntermediateStream must not be called multiple times with the same streamId: " + streamId);

  if (isBroadcast) {
    intermediateBroadcastStreamIds.add(streamId);
  }

  boolean isKeyed = serde instanceof KVSerde;
  KV<Serde, Serde> kvSerdes = getOrCreateStreamSerdes(streamId, serde);

  InputTransformer transformer = (InputTransformer) getDefaultSystemDescriptor()
      .flatMap(SystemDescriptor::getTransformer).orElse(null);

  InputOperatorSpec inputOperatorSpec =
      OperatorSpecs.createInputOperatorSpec(streamId, kvSerdes.getKey(), kvSerdes.getValue(),
          transformer, isKeyed, this.getNextOpId(OpCode.INPUT, null));
  inputOperators.put(streamId, inputOperatorSpec);
  outputStreams.put(streamId, new OutputStreamImpl(streamId, kvSerdes.getKey(), kvSerdes.getValue(), isKeyed));
  return new IntermediateMessageStreamImpl<>(this, inputOperators.get(streamId), outputStreams.get(streamId));
}
 
Example #4
Source File: RepartitionExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));

  KafkaOutputDescriptor<KV<String, MyStreamOutput>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(MyStreamOutput.class)));

  appDescriptor.withDefaultSystem(trackingSystem);
  MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<KV<String, MyStreamOutput>> pageViewEventPerMember = appDescriptor.getOutputStream(outputStreamDescriptor);

  pageViewEvents
      .partitionBy(pve -> pve.getMemberId(), pve -> pve,
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewEvent.class)), "partitionBy")
      .window(Windows.keyedTumblingWindow(
          KV::getKey, Duration.ofMinutes(5), () -> 0, (m, c) -> c + 1, null, null), "window")
      .map(windowPane -> KV.of(windowPane.getKey().getKey(), new MyStreamOutput(windowPane)))
      .sendTo(pageViewEventPerMember);
}
 
Example #5
Source File: KeyValueStoreExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));

  KafkaOutputDescriptor<KV<String, StatsOutput>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(StatsOutput.class)));

  appDescriptor.withDefaultSystem(trackingSystem);
  MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<KV<String, StatsOutput>> pageViewEventPerMember = appDescriptor.getOutputStream(outputStreamDescriptor);

  pageViewEvents
      .partitionBy(pve -> pve.getMemberId(), pve -> pve,
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewEvent.class)), "partitionBy")
      .map(KV::getValue)
      .flatMap(new MyStatsCounter())
      .map(stats -> KV.of(stats.memberId, stats))
      .sendTo(pageViewEventPerMember);
}
 
Example #6
Source File: PageViewCounterExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));

  KafkaOutputDescriptor<KV<String, PageViewCount>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewCount.class)));

  MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<KV<String, PageViewCount>> pageViewEventPerMemberStream = appDescriptor.getOutputStream(outputStreamDescriptor);

  SupplierFunction<Integer> initialValue = () -> 0;
  FoldLeftFunction<PageViewEvent, Integer> foldLeftFn = (m, c) -> c + 1;
  pageViewEvents
      .window(Windows.keyedTumblingWindow(PageViewEvent::getMemberId, Duration.ofSeconds(10), initialValue, foldLeftFn, null, null)
          .setEarlyTrigger(Triggers.repeat(Triggers.count(5)))
          .setAccumulationMode(AccumulationMode.DISCARDING), "tumblingWindow")
      .map(windowPane -> KV.of(windowPane.getKey().getKey(), buildPageViewCount(windowPane)))
      .sendTo(pageViewEventPerMemberStream);
}
 
Example #7
Source File: MergeExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KVSerde<String, PageViewEvent> serde = KVSerde.of(new StringSerde("UTF-8"), new JsonSerdeV2<>(PageViewEvent.class));
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<KV<String, PageViewEvent>> isd1 =
      trackingSystem.getInputDescriptor("pageViewStream1", serde);
  KafkaInputDescriptor<KV<String, PageViewEvent>> isd2 =
      trackingSystem.getInputDescriptor("pageViewStream2", serde);
  KafkaInputDescriptor<KV<String, PageViewEvent>> isd3 =
      trackingSystem.getInputDescriptor("pageViewStream3", serde);

  KafkaOutputDescriptor<KV<String, PageViewEvent>> osd =
      trackingSystem.getOutputDescriptor("mergedStream", serde);

  MessageStream
      .mergeAll(ImmutableList.of(appDescriptor.getInputStream(isd1), appDescriptor.getInputStream(isd2), appDescriptor.getInputStream(isd3)))
      .sendTo(appDescriptor.getOutputStream(osd));
}
 
Example #8
Source File: BroadcastExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KVSerde<String, PageViewEvent> serde = KVSerde.of(new StringSerde("UTF-8"), new JsonSerdeV2<>(PageViewEvent.class));
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
  KafkaInputDescriptor<KV<String, PageViewEvent>> pageViewEvent =
      trackingSystem.getInputDescriptor("pageViewEvent", serde);
  KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream1 =
      trackingSystem.getOutputDescriptor("outStream1", serde);
  KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream2 =
      trackingSystem.getOutputDescriptor("outStream2", serde);
  KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream3 =
      trackingSystem.getOutputDescriptor("outStream3", serde);

  MessageStream<KV<String, PageViewEvent>> inputStream = appDescriptor.getInputStream(pageViewEvent);
  inputStream.filter(m -> m.key.equals("key1")).sendTo(appDescriptor.getOutputStream(outStream1));
  inputStream.filter(m -> m.key.equals("key2")).sendTo(appDescriptor.getOutputStream(outStream2));
  inputStream.filter(m -> m.key.equals("key3")).sendTo(appDescriptor.getOutputStream(outStream3));
}
 
Example #9
Source File: AppWithGlobalConfigExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));

  KafkaOutputDescriptor<KV<String, PageViewCount>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewCount.class)));

  appDescriptor.getInputStream(inputStreamDescriptor)
      .window(Windows.<PageViewEvent, String, Integer>keyedTumblingWindow(PageViewEvent::getMemberId, Duration.ofSeconds(10), () -> 0, (m, c) -> c + 1,
          null, null)
          .setEarlyTrigger(Triggers.repeat(Triggers.count(5)))
          .setAccumulationMode(AccumulationMode.DISCARDING), "window1")
      .map(m -> KV.of(m.getKey().getKey(), buildPageViewCount(m)))
      .sendTo(appDescriptor.getOutputStream(outputStreamDescriptor));

  appDescriptor.withMetricsReporterFactories(new HashMap<>());
}
 
Example #10
Source File: AsyncApplicationExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<AdClickEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("adClickEvent", new JsonSerdeV2<>(AdClickEvent.class));

  KafkaOutputDescriptor<KV<String, EnrichedAdClickEvent>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("enrichedAdClickEvent",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(EnrichedAdClickEvent.class)));

  MessageStream<AdClickEvent> adClickEventStream = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<KV<String, EnrichedAdClickEvent>> enrichedAdClickStream =
      appDescriptor.getOutputStream(outputStreamDescriptor);

  adClickEventStream
      .flatMapAsync(AsyncApplicationExample::enrichAdClickEvent)
      .map(enrichedAdClickEvent -> KV.of(enrichedAdClickEvent.getCountry(), enrichedAdClickEvent))
      .sendTo(enrichedAdClickStream);
}
 
Example #11
Source File: TransactionalStateIntegrationTest.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(INPUT_SYSTEM);
  KVSerde<String, String> serde = KVSerde.of(new StringSerde(), new StringSerde());

  KafkaInputDescriptor<KV<String, String>> isd = ksd.getInputDescriptor(INPUT_TOPIC, serde);

  RocksDbTableDescriptor<String, String> td = new RocksDbTableDescriptor<>(STORE_NAME, serde)
      .withChangelogStream(changelogTopic)
      .withChangelogReplicationFactor(1);

  appDescriptor
      .withInputStream(isd)
      .withTaskFactory((StreamTaskFactory) () -> new MyTask())
      .withTable(td);
}
 
Example #12
Source File: TransactionalStateMultiStoreIntegrationTest.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(INPUT_SYSTEM);
  KVSerde<String, String> serde = KVSerde.of(new StringSerde(), new StringSerde());

  KafkaInputDescriptor<KV<String, String>> isd = ksd.getInputDescriptor(INPUT_TOPIC, serde);

  RocksDbTableDescriptor<String, String> td1 = new RocksDbTableDescriptor<>(STORE_1_NAME, serde)
      .withChangelogStream(changelogTopic)
      .withChangelogReplicationFactor(1);

  RocksDbTableDescriptor<String, String> td2 = new RocksDbTableDescriptor<>(STORE_2_NAME, serde)
      .withChangelogStream(STORE_2_CHANGELOG)
      .withChangelogReplicationFactor(1);

  appDescriptor
      .withInputStream(isd)
      .withTaskFactory((StreamTaskFactory) () -> new MyTask())
      .withTable(td1)
      .withTable(td2);
}
 
Example #13
Source File: OrderShipmentJoinExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<OrderRecord> orderStreamDescriptor =
      trackingSystem.getInputDescriptor("orders", new JsonSerdeV2<>(OrderRecord.class));
  KafkaInputDescriptor<ShipmentRecord> shipmentStreamDescriptor =
      trackingSystem.getInputDescriptor("shipments", new JsonSerdeV2<>(ShipmentRecord.class));
  KafkaOutputDescriptor<KV<String, FulfilledOrderRecord>> fulfilledOrdersStreamDescriptor =
      trackingSystem.getOutputDescriptor("fulfilledOrders",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(FulfilledOrderRecord.class)));

  appDescriptor.getInputStream(orderStreamDescriptor)
      .join(appDescriptor.getInputStream(shipmentStreamDescriptor), new MyJoinFunction(),
          new StringSerde(), new JsonSerdeV2<>(OrderRecord.class), new JsonSerdeV2<>(ShipmentRecord.class),
          Duration.ofMinutes(1), "join")
      .map(fulFilledOrder -> KV.of(fulFilledOrder.orderId, fulFilledOrder))
      .sendTo(appDescriptor.getOutputStream(fulfilledOrdersStreamDescriptor));

}
 
Example #14
Source File: TestStreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetOutputStreamWithKeyValueSerde() {
  String streamId = "test-stream-1";
  KVSerde mockKVSerde = mock(KVSerde.class);
  Serde mockKeySerde = mock(Serde.class);
  Serde mockValueSerde = mock(Serde.class);
  doReturn(mockKeySerde).when(mockKVSerde).getKeySerde();
  doReturn(mockValueSerde).when(mockKVSerde).getValueSerde();
  GenericSystemDescriptor sd = new GenericSystemDescriptor("mockSystem", "mockSystemFactoryClass");
  GenericOutputDescriptor osd = sd.getOutputDescriptor(streamId, mockKVSerde);

  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
    appDesc.getOutputStream(osd);
  }, getConfig());

  OutputStreamImpl<TestMessageEnvelope> outputStreamImpl = streamAppDesc.getOutputStreams().get(streamId);
  assertEquals(streamId, outputStreamImpl.getStreamId());
  assertEquals(osd, streamAppDesc.getOutputDescriptors().get(streamId));
  assertEquals(mockKeySerde, outputStreamImpl.getKeySerde());
  assertEquals(mockValueSerde, outputStreamImpl.getValueSerde());
}
 
Example #15
Source File: TestTableConfigGenerator.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testWithSerdes() {
  List<TableDescriptor> descriptors = Arrays.asList(
      new MockLocalTableDescriptor("t1", KVSerde.of(new StringSerde(), new IntegerSerde())),
      new MockLocalTableDescriptor("t2", KVSerde.of(new StringSerde(), new IntegerSerde()))
  );
  Config jobConfig = new MapConfig(TableConfigGenerator.generateSerdeConfig(descriptors));
  JavaTableConfig javaTableConfig = new JavaTableConfig(jobConfig);
  assertNotNull(javaTableConfig.getKeySerde("t1"));
  assertNotNull(javaTableConfig.getMsgSerde("t1"));
  assertNotNull(javaTableConfig.getKeySerde("t2"));
  assertNotNull(javaTableConfig.getMsgSerde("t2"));

  MapConfig tableConfig = new MapConfig(TableConfigGenerator.generate(jobConfig, descriptors));
  javaTableConfig = new JavaTableConfig(tableConfig);
  assertNotNull(javaTableConfig.getTableProviderFactory("t1"));
  assertNotNull(javaTableConfig.getTableProviderFactory("t2"));
}
 
Example #16
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl createSimpleGraph() {
  /**
   * a simple graph of partitionBy and map
   *
   * input1 -> partitionBy -> map -> output1
   *
   */
  return new StreamApplicationDescriptorImpl(appDesc-> {
    MessageStream<KV<Object, Object>> input1 = appDesc.getInputStream(input1Descriptor);
    OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
    input1
        .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1")
        .map(kv -> kv)
        .sendTo(output1);
  }, config);
}
 
Example #17
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl getAggregateTumblingWindowStreamGraph(AccumulationMode mode, Duration timeDuration,
      Trigger<IntegerEnvelope> earlyTrigger) throws IOException {
  StreamApplication userApp = appDesc -> {
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());
    GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde);
    MessageStream<KV<Integer, Integer>> integers = appDesc.getInputStream(inputDescriptor);

    integers
        .map(new KVMapFunction())
        .window(Windows.<IntegerEnvelope, Integer>tumblingWindow(timeDuration, () -> 0, (m, c) -> c + 1, new IntegerSerde())
            .setEarlyTrigger(earlyTrigger)
            .setAccumulationMode(mode), "w1")
        .sink((message, messageCollector, taskCoordinator) -> {
          SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream");
          messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message));
        });
  };

  return new StreamApplicationDescriptorImpl(userApp, config);
}
 
Example #18
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl getKeyedSessionWindowStreamGraph(AccumulationMode mode, Duration duration) throws IOException {
  StreamApplication userApp = appDesc -> {
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());
    GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde);
    appDesc.getInputStream(inputDescriptor)
        .window(Windows.keyedSessionWindow(KV::getKey, duration, new IntegerSerde(), kvSerde)
            .setAccumulationMode(mode), "w1")
        .sink((message, messageCollector, taskCoordinator) -> {
          SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream");
          messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message));
        });
  };

  return new StreamApplicationDescriptorImpl(userApp, config);
}
 
Example #19
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl getTumblingWindowStreamGraph(AccumulationMode mode,
    Duration duration, Trigger<KV<Integer, Integer>> earlyTrigger) throws IOException {
  StreamApplication userApp = appDesc -> {
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());
    GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde);
    appDesc.getInputStream(inputDescriptor)
        .window(Windows.tumblingWindow(duration, kvSerde).setEarlyTrigger(earlyTrigger)
            .setAccumulationMode(mode), "w1")
        .sink((message, messageCollector, taskCoordinator) -> {
          SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream");
          messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message));
        });
  };

  return new StreamApplicationDescriptorImpl(userApp, config);
}
 
Example #20
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl getKeyedTumblingWindowStreamGraph(AccumulationMode mode,
    Duration duration, Trigger<KV<Integer, Integer>> earlyTrigger) throws IOException {

  StreamApplication userApp = appDesc -> {
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());
    GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde);
    appDesc.getInputStream(inputDescriptor)
        .window(Windows.keyedTumblingWindow(KV::getKey, duration, new IntegerSerde(), kvSerde)
            .setEarlyTrigger(earlyTrigger).setAccumulationMode(mode), "w1")
        .sink((message, messageCollector, taskCoordinator) -> {
          SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream");
          messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message));
        });
  };

  return new StreamApplicationDescriptorImpl(userApp, config);
}
 
Example #21
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
@Before
public void setup() {
  Map<String, String> configMap = new HashMap<>();
  configMap.put("job.default.system", "kafka");
  configMap.put("job.name", "jobName");
  configMap.put("job.id", "jobId");
  this.config = new MapConfig(configMap);

  this.context = new MockContext();
  when(this.context.getJobContext().getConfig()).thenReturn(this.config);
  Serde storeKeySerde = new TimeSeriesKeySerde(new IntegerSerde());
  Serde storeValSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());

  TaskModel taskModel = mock(TaskModel.class);
  when(taskModel.getSystemStreamPartitions()).thenReturn(ImmutableSet
      .of(new SystemStreamPartition("kafka", "integers", new Partition(0))));
  when(taskModel.getTaskName()).thenReturn(new TaskName("task 1"));
  when(this.context.getTaskContext().getTaskModel()).thenReturn(taskModel);
  when(this.context.getTaskContext().getTaskMetricsRegistry()).thenReturn(new MetricsRegistryMap());
  when(this.context.getContainerContext().getContainerMetricsRegistry()).thenReturn(new MetricsRegistryMap());
  when(this.context.getTaskContext().getStore("jobName-jobId-window-w1"))
      .thenReturn(new TestInMemoryStore<>(storeKeySerde, storeValSerde));
}
 
Example #22
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoinWithSideInputs() {
  /**
   * Example stream-table join where table t is configured with input1 (64) as a side-input stream.
   *
   *                                   join-table t -> output1 (8)
   *                                        |
   *    input2 (16) -> partitionBy ("64") __|
   *
   */
  return new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<KV<Object, Object>> messageStream2 = appDesc.getInputStream(input2Descriptor);
    OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);

    TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor(
      "table-id", new KVSerde(new StringSerde(), new StringSerde()))
        .withSideInputs(Arrays.asList("input1"))
        .withSideInputsProcessor(mock(SideInputsProcessor.class));
    Table table = appDesc.getTable(tableDescriptor);

    messageStream2
        .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1")
        .join(table, mock(StreamTableJoinFunction.class))
        .sendTo(output1);
  }, config);
}
 
Example #23
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl createStreamGraphWithInvalidStreamTableJoinWithSideInputs() {
  /**
   * Example stream-table join that is invalid due to disagreement in partition count between the
   * stream behind table t and another joined stream. Table t is configured with input2 (16) as
   * side-input stream.
   *
   *                   join-table t -> output1 (8)
   *                         |
   *    input1 (64) —————————
   *
   */
  return new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
    OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);

    TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor(
      "table-id", new KVSerde(new StringSerde(), new StringSerde()))
        .withSideInputs(Arrays.asList("input2"))
        .withSideInputsProcessor(mock(SideInputsProcessor.class));
    Table table = appDesc.getTable(tableDescriptor);

    messageStream1
        .join(table, mock(StreamTableJoinFunction.class))
        .sendTo(output1);
  }, config);
}
 
Example #24
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoinAndSendToSameTable() {
  /**
   * A special example of stream-table join where a stream is joined with a table, and the result is
   * sent to the same table. This example is necessary to ensure {@link ExecutionPlanner} does not
   * get stuck traversing the virtual cycle between stream-table-join and send-to-table operator specs
   * indefinitely.
   *
   * The reason such virtual cycle is present is to support computing partitions of intermediate
   * streams participating in stream-table joins. Please, refer to SAMZA SEP-16 for more details.
   */
  return new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);

    TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor(
      "table-id", new KVSerde(new StringSerde(), new StringSerde()));
    Table table = appDesc.getTable(tableDescriptor);

    messageStream1
      .join(table, mock(StreamTableJoinFunction.class))
      .sendTo(table);

  }, config);
}
 
Example #25
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testMaxPartitionLimit() {
  int partitionLimit = IntermediateStreamManager.MAX_INFERRED_PARTITIONS;

  ExecutionPlanner planner = new ExecutionPlanner(config, streamManager);
  StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<KV<Object, Object>> input1 = appDesc.getInputStream(input4Descriptor);
    OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
    input1.partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1").map(kv -> kv).sendTo(output1);
  }, config);

  JobGraph jobGraph = (JobGraph) planner.plan(graphSpec);

  // Partitions should be the same as input1
  jobGraph.getIntermediateStreams().forEach(edge -> {
    assertEquals(partitionLimit, edge.getPartitionCount()); // max of input1 and output1
  });
}
 
Example #26
Source File: TestJoinOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test(expected = SamzaException.class)
public void joinWithSelfThrowsException() throws Exception {
  Map<String, String> mapConfig = new HashMap<>();
  mapConfig.put("job.name", "jobName");
  mapConfig.put("job.id", "jobId");
  StreamTestUtils.addStreamConfigs(mapConfig, "inStream", "insystem", "instream");
  Config config = new MapConfig(mapConfig);

  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
    IntegerSerde integerSerde = new IntegerSerde();
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(integerSerde, integerSerde);
    GenericSystemDescriptor sd = new GenericSystemDescriptor("insystem", "mockFactoryClassName");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("inStream", kvSerde);

    MessageStream<KV<Integer, Integer>> inStream = appDesc.getInputStream(inputDescriptor);

    inStream.join(inStream, new TestJoinFunction(), integerSerde, kvSerde, kvSerde, JOIN_TTL, "join");
  }, config);

  createStreamOperatorTask(new SystemClock(), streamAppDesc); // should throw an exception
}
 
Example #27
Source File: TumblingWindowApp.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
  KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
  KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);

  MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
  OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);

  pageViews
      .filter(m -> !FILTER_KEY.equals(m.getUserId()))
      .window(Windows.keyedTumblingWindow(PageView::getUserId, Duration.ofSeconds(3),
          new StringSerde(), new JsonSerdeV2<>(PageView.class)), "tumblingWindow")
      .map(m -> KV.of(m.getKey().getKey(), m.getMessage().size()))
      .sendTo(outputStream);

}
 
Example #28
Source File: TestPartitionByOperatorSpec.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testPartitionByWithNoSerde() {
  MapFunction<Object, String> keyFn = m -> m.toString();
  MapFunction<Object, Object> valueFn = m -> m;
  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream inputStream = appDesc.getInputStream(testInputDescriptor);
    inputStream.partitionBy(keyFn, valueFn, mock(KVSerde.class), testRepartitionedStreamName);
  }, getConfig());
  InputOperatorSpec inputOpSpec = streamAppDesc.getInputOperators().get(
      String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName));
  assertNotNull(inputOpSpec);
  assertNull(inputOpSpec.getKeySerde());
  assertNull(inputOpSpec.getValueSerde());
  assertTrue(inputOpSpec.isKeyed());
  assertNull(inputOpSpec.getScheduledFn());
  assertNull(inputOpSpec.getWatermarkFn());
  InputOperatorSpec originInputSpec = streamAppDesc.getInputOperators().get(testInputDescriptor.getStreamId());
  assertTrue(originInputSpec.getRegisteredOperatorSpecs().toArray()[0] instanceof PartitionByOperatorSpec);
  PartitionByOperatorSpec reparOpSpec  = (PartitionByOperatorSpec) originInputSpec.getRegisteredOperatorSpecs().toArray()[0];
  assertEquals(reparOpSpec.getOpId(), String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName));
  assertEquals(reparOpSpec.getKeyFunction(), keyFn);
  assertEquals(reparOpSpec.getValueFunction(), valueFn);
  assertEquals(reparOpSpec.getOutputStream().getStreamId(), reparOpSpec.getOpId());
  assertNull(reparOpSpec.getScheduledFn());
  assertNull(reparOpSpec.getWatermarkFn());
}
 
Example #29
Source File: ImpulseTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void translatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {

  final String outputId = ctx.getOutputId(transform);
  final GenericSystemDescriptor systemDescriptor =
      new GenericSystemDescriptor(outputId, SamzaImpulseSystemFactory.class.getName());

  // The KvCoder is needed here for Samza not to crop the key.
  final Serde<KV<?, OpMessage<byte[]>>> kvSerde = KVSerde.of(new NoOpSerde(), new NoOpSerde<>());
  final GenericInputDescriptor<KV<?, OpMessage<byte[]>>> inputDescriptor =
      systemDescriptor.getInputDescriptor(outputId, kvSerde);

  ctx.registerInputMessageStream(outputId, inputDescriptor);
}
 
Example #30
Source File: TestPartitionByOperatorSpec.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testCopy() {
  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream inputStream = appDesc.getInputStream(testInputDescriptor);
    inputStream.partitionBy(m -> m.toString(), m -> m, mock(KVSerde.class), testRepartitionedStreamName);
  }, getConfig());
  OperatorSpecGraph specGraph = streamAppDesc.getOperatorSpecGraph();
  OperatorSpecGraph clonedGraph = specGraph.clone();
  OperatorSpecTestUtils.assertClonedGraph(specGraph, clonedGraph);
}