org.apache.samza.operators.MessageStream Java Examples

The following examples show how to use org.apache.samza.operators.MessageStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl createStreamGraphWithInvalidStreamStreamJoin() {
  /**
   * Creates the following stream-stream join which is invalid due to partition count disagreement
   * between the 2 input streams.
   *
   *   input1 (64) --
   *                 |
   *                join -> output1 (8)
   *                 |
   *   input3 (32) --
   */
  return new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
    MessageStream<KV<Object, Object>> messageStream3 = appDesc.getInputStream(input3Descriptor);
    OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);

    messageStream1
        .join(messageStream3,
            mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(2), "j1")
        .sendTo(output1);
  }, config);
}
 
Example #2
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl getAggregateTumblingWindowStreamGraph(AccumulationMode mode, Duration timeDuration,
      Trigger<IntegerEnvelope> earlyTrigger) throws IOException {
  StreamApplication userApp = appDesc -> {
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());
    GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde);
    MessageStream<KV<Integer, Integer>> integers = appDesc.getInputStream(inputDescriptor);

    integers
        .map(new KVMapFunction())
        .window(Windows.<IntegerEnvelope, Integer>tumblingWindow(timeDuration, () -> 0, (m, c) -> c + 1, new IntegerSerde())
            .setEarlyTrigger(earlyTrigger)
            .setAccumulationMode(mode), "w1")
        .sink((message, messageCollector, taskCoordinator) -> {
          SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream");
          messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message));
        });
  };

  return new StreamApplicationDescriptorImpl(userApp, config);
}
 
Example #3
Source File: TumblingWindowApp.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
  KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
  KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);

  MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
  OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);

  pageViews
      .filter(m -> !FILTER_KEY.equals(m.getUserId()))
      .window(Windows.keyedTumblingWindow(PageView::getUserId, Duration.ofSeconds(3),
          new StringSerde(), new JsonSerdeV2<>(PageView.class)), "tumblingWindow")
      .map(m -> KV.of(m.getKey().getKey(), m.getMessage().size()))
      .sendTo(outputStream);

}
 
Example #4
Source File: SessionWindowApp.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
  KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
  KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);

  MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
  OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);

  pageViews
      .filter(m -> !FILTER_KEY.equals(m.getUserId()))
      .window(Windows.keyedSessionWindow(PageView::getUserId, Duration.ofSeconds(3),
          new StringSerde(), new JsonSerdeV2<>(PageView.class)), "sessionWindow")
      .map(m -> KV.of(m.getKey().getKey(), m.getMessage().size()))
      .sendTo(outputStream);
}
 
Example #5
Source File: AsyncApplicationExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<AdClickEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("adClickEvent", new JsonSerdeV2<>(AdClickEvent.class));

  KafkaOutputDescriptor<KV<String, EnrichedAdClickEvent>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("enrichedAdClickEvent",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(EnrichedAdClickEvent.class)));

  MessageStream<AdClickEvent> adClickEventStream = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<KV<String, EnrichedAdClickEvent>> enrichedAdClickStream =
      appDescriptor.getOutputStream(outputStreamDescriptor);

  adClickEventStream
      .flatMapAsync(AsyncApplicationExample::enrichAdClickEvent)
      .map(enrichedAdClickEvent -> KV.of(enrichedAdClickEvent.getCountry(), enrichedAdClickEvent))
      .sendTo(enrichedAdClickStream);
}
 
Example #6
Source File: BroadcastExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KVSerde<String, PageViewEvent> serde = KVSerde.of(new StringSerde("UTF-8"), new JsonSerdeV2<>(PageViewEvent.class));
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
  KafkaInputDescriptor<KV<String, PageViewEvent>> pageViewEvent =
      trackingSystem.getInputDescriptor("pageViewEvent", serde);
  KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream1 =
      trackingSystem.getOutputDescriptor("outStream1", serde);
  KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream2 =
      trackingSystem.getOutputDescriptor("outStream2", serde);
  KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream3 =
      trackingSystem.getOutputDescriptor("outStream3", serde);

  MessageStream<KV<String, PageViewEvent>> inputStream = appDescriptor.getInputStream(pageViewEvent);
  inputStream.filter(m -> m.key.equals("key1")).sendTo(appDescriptor.getOutputStream(outStream1));
  inputStream.filter(m -> m.key.equals("key2")).sendTo(appDescriptor.getOutputStream(outStream2));
  inputStream.filter(m -> m.key.equals("key3")).sendTo(appDescriptor.getOutputStream(outStream3));
}
 
Example #7
Source File: WindowExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));
  KafkaOutputDescriptor<Integer> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember", new IntegerSerde());

  SupplierFunction<Integer> initialValue = () -> 0;
  FoldLeftFunction<PageViewEvent, Integer> counter = (m, c) -> c == null ? 1 : c + 1;
  MessageStream<PageViewEvent> inputStream = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<Integer> outputStream = appDescriptor.getOutputStream(outputStreamDescriptor);

  // create a tumbling window that outputs the number of message collected every 10 minutes.
  // also emit early results if either the number of messages collected reaches 30000, or if no new messages arrive
  // for 1 minute.
  inputStream
      .window(Windows.tumblingWindow(Duration.ofMinutes(10), initialValue, counter, new IntegerSerde())
          .setLateTrigger(Triggers.any(Triggers.count(30000),
              Triggers.timeSinceLastMessage(Duration.ofMinutes(1)))), "window")
      .map(WindowPane::getMessage)
      .sendTo(outputStream);
}
 
Example #8
Source File: TestSchedulingApp.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  final JsonSerdeV2<PageView> serde = new JsonSerdeV2<>(PageView.class);
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("kafka");
  KafkaInputDescriptor<PageView> isd = ksd.getInputDescriptor(PAGE_VIEWS, serde);
  final MessageStream<PageView> pageViews = appDescriptor.getInputStream(isd);
  final MessageStream<PageView> output = pageViews.flatMap(new FlatmapScheduledFn());

  MessageStreamAssert.that("Output from scheduling function should container all complete messages", output, serde)
      .containsInAnyOrder(
          Arrays.asList(
              new PageView("v1-complete", "p1", "u1"),
              new PageView("v2-complete", "p2", "u1"),
              new PageView("v3-complete", "p1", "u2"),
              new PageView("v4-complete", "p3", "u2")
          ));
}
 
Example #9
Source File: WindowAssignTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void translatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {
  final RunnerApi.WindowIntoPayload payload;
  try {
    payload =
        RunnerApi.WindowIntoPayload.parseFrom(transform.getTransform().getSpec().getPayload());
  } catch (InvalidProtocolBufferException e) {
    throw new IllegalArgumentException(
        String.format("failed to parse WindowIntoPayload: %s", transform.getId()), e);
  }

  @SuppressWarnings("unchecked")
  final WindowFn<T, ?> windowFn =
      (WindowFn<T, ?>) WindowingStrategyTranslation.windowFnFromProto(payload.getWindowFn());

  final MessageStream<OpMessage<T>> inputStream = ctx.getOneInputMessageStream(transform);

  final MessageStream<OpMessage<T>> outputStream =
      inputStream.flatMap(OpAdapter.adapt(new WindowAssignOp<>(windowFn)));

  ctx.registerMessageStream(ctx.getOutputId(transform), outputStream);
}
 
Example #10
Source File: FlattenPCollectionsTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
private static <T> MessageStream<OpMessage<T>> mergeInputStreams(
    List<MessageStream<OpMessage<T>>> inputStreams) {
  if (inputStreams.size() == 1) {
    return Iterables.getOnlyElement(inputStreams);
  }
  final Set<MessageStream<OpMessage<T>>> streamsToMerge = new HashSet<>();
  inputStreams.forEach(
      stream -> {
        if (!streamsToMerge.add(stream)) {
          // Merge same streams. Make a copy of the current stream.
          streamsToMerge.add(stream.map(m -> m));
        }
      });

  return MessageStream.mergeAll(streamsToMerge);
}
 
Example #11
Source File: MergeExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KVSerde<String, PageViewEvent> serde = KVSerde.of(new StringSerde("UTF-8"), new JsonSerdeV2<>(PageViewEvent.class));
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<KV<String, PageViewEvent>> isd1 =
      trackingSystem.getInputDescriptor("pageViewStream1", serde);
  KafkaInputDescriptor<KV<String, PageViewEvent>> isd2 =
      trackingSystem.getInputDescriptor("pageViewStream2", serde);
  KafkaInputDescriptor<KV<String, PageViewEvent>> isd3 =
      trackingSystem.getInputDescriptor("pageViewStream3", serde);

  KafkaOutputDescriptor<KV<String, PageViewEvent>> osd =
      trackingSystem.getOutputDescriptor("mergedStream", serde);

  MessageStream
      .mergeAll(ImmutableList.of(appDescriptor.getInputStream(isd1), appDescriptor.getInputStream(isd2), appDescriptor.getInputStream(isd3)))
      .sendTo(appDescriptor.getOutputStream(osd));
}
 
Example #12
Source File: KeyValueStoreExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));

  KafkaOutputDescriptor<KV<String, StatsOutput>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(StatsOutput.class)));

  appDescriptor.withDefaultSystem(trackingSystem);
  MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<KV<String, StatsOutput>> pageViewEventPerMember = appDescriptor.getOutputStream(outputStreamDescriptor);

  pageViewEvents
      .partitionBy(pve -> pve.getMemberId(), pve -> pve,
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewEvent.class)), "partitionBy")
      .map(KV::getValue)
      .flatMap(new MyStatsCounter())
      .map(stats -> KV.of(stats.memberId, stats))
      .sendTo(pageViewEventPerMember);
}
 
Example #13
Source File: TestPartitionByOperatorSpec.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testPartitionByWithNoSerde() {
  MapFunction<Object, String> keyFn = m -> m.toString();
  MapFunction<Object, Object> valueFn = m -> m;
  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream inputStream = appDesc.getInputStream(testInputDescriptor);
    inputStream.partitionBy(keyFn, valueFn, mock(KVSerde.class), testRepartitionedStreamName);
  }, getConfig());
  InputOperatorSpec inputOpSpec = streamAppDesc.getInputOperators().get(
      String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName));
  assertNotNull(inputOpSpec);
  assertNull(inputOpSpec.getKeySerde());
  assertNull(inputOpSpec.getValueSerde());
  assertTrue(inputOpSpec.isKeyed());
  assertNull(inputOpSpec.getScheduledFn());
  assertNull(inputOpSpec.getWatermarkFn());
  InputOperatorSpec originInputSpec = streamAppDesc.getInputOperators().get(testInputDescriptor.getStreamId());
  assertTrue(originInputSpec.getRegisteredOperatorSpecs().toArray()[0] instanceof PartitionByOperatorSpec);
  PartitionByOperatorSpec reparOpSpec  = (PartitionByOperatorSpec) originInputSpec.getRegisteredOperatorSpecs().toArray()[0];
  assertEquals(reparOpSpec.getOpId(), String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName));
  assertEquals(reparOpSpec.getKeyFunction(), keyFn);
  assertEquals(reparOpSpec.getValueFunction(), valueFn);
  assertEquals(reparOpSpec.getOutputStream().getStreamId(), reparOpSpec.getOpId());
  assertNull(reparOpSpec.getScheduledFn());
  assertNull(reparOpSpec.getWatermarkFn());
}
 
Example #14
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl createSimpleGraph() {
  /**
   * a simple graph of partitionBy and map
   *
   * input1 -> partitionBy -> map -> output1
   *
   */
  return new StreamApplicationDescriptorImpl(appDesc-> {
    MessageStream<KV<Object, Object>> input1 = appDesc.getInputStream(input1Descriptor);
    OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
    input1
        .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1")
        .map(kv -> kv)
        .sendTo(output1);
  }, config);
}
 
Example #15
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoinAndSendToSameTable() {
  /**
   * A special example of stream-table join where a stream is joined with a table, and the result is
   * sent to the same table. This example is necessary to ensure {@link ExecutionPlanner} does not
   * get stuck traversing the virtual cycle between stream-table-join and send-to-table operator specs
   * indefinitely.
   *
   * The reason such virtual cycle is present is to support computing partitions of intermediate
   * streams participating in stream-table joins. Please, refer to SAMZA SEP-16 for more details.
   */
  return new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);

    TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor(
      "table-id", new KVSerde(new StringSerde(), new StringSerde()));
    Table table = appDesc.getTable(tableDescriptor);

    messageStream1
      .join(table, mock(StreamTableJoinFunction.class))
      .sendTo(table);

  }, config);
}
 
Example #16
Source File: StreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public <M> MessageStream<M> getInputStream(InputDescriptor<M, ?> inputDescriptor) {
  SystemDescriptor systemDescriptor = inputDescriptor.getSystemDescriptor();
  Optional<StreamExpander> expander = systemDescriptor.getExpander();
  if (expander.isPresent()) {
    return expander.get().apply(this, inputDescriptor);
  }

  // TODO: SAMZA-1841: need to add to the broadcast streams if inputDescriptor is for a broadcast stream
  addInputDescriptor(inputDescriptor);

  String streamId = inputDescriptor.getStreamId();
  Serde serde = inputDescriptor.getSerde();
  KV<Serde, Serde> kvSerdes = getOrCreateStreamSerdes(streamId, serde);
  boolean isKeyed = serde instanceof KVSerde;
  InputTransformer transformer = inputDescriptor.getTransformer().orElse(null);
  InputOperatorSpec inputOperatorSpec =
      OperatorSpecs.createInputOperatorSpec(streamId, kvSerdes.getKey(), kvSerdes.getValue(),
          transformer, isKeyed, this.getNextOpId(OpCode.INPUT, null));
  inputOperators.put(streamId, inputOperatorSpec);
  return new MessageStreamImpl(this, inputOperators.get(streamId));
}
 
Example #17
Source File: FilterExample.java    From samza-hello-samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME)
      .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
      .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
      .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KVSerde<String, PageView> serde = KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageView.class));
  KafkaInputDescriptor<KV<String, PageView>> inputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(INPUT_STREAM_ID, serde);
  KafkaOutputDescriptor<KV<String, PageView>> outputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM_ID, serde);

  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  MessageStream<KV<String, PageView>> pageViews = appDescriptor.getInputStream(inputDescriptor);
  OutputStream<KV<String, PageView>> filteredPageViews = appDescriptor.getOutputStream(outputDescriptor);

  pageViews
      .filter(kv -> !INVALID_USER_ID.equals(kv.value.userId))
      .sendTo(filteredPageViews);
}
 
Example #18
Source File: TranslationContext.java    From beam with Apache License 2.0 6 votes vote down vote up
public <OutT> void registerInputMessageStream(
    PValue pvalue,
    InputDescriptor<org.apache.samza.operators.KV<?, OpMessage<OutT>>, ?> inputDescriptor) {
  // we want to register it with the Samza graph only once per i/o stream
  final String streamId = inputDescriptor.getStreamId();
  if (registeredInputStreams.containsKey(streamId)) {
    MessageStream<OpMessage<OutT>> messageStream = registeredInputStreams.get(streamId);
    LOG.info(
        String.format(
            "Stream id %s has already been mapped to %s stream. Mapping %s to the same message stream.",
            streamId, messageStream, pvalue));
    registerMessageStream(pvalue, messageStream);

    return;
  }
  @SuppressWarnings("unchecked")
  final MessageStream<OpMessage<OutT>> typedStream =
      getValueStream(appDescriptor.getInputStream(inputDescriptor));

  registerMessageStream(pvalue, typedStream);
  registeredInputStreams.put(streamId, typedStream);
}
 
Example #19
Source File: ProjectTranslator.java    From samza with Apache License 2.0 6 votes vote down vote up
void translate(final Project project, final String logicalOpId, final TranslatorContext context) {
  MessageStream<SamzaSqlRelMessage> messageStream = context.getMessageStream(project.getInput().getId());

  final int projectId = project.getId();

  MessageStream<SamzaSqlRelMessage> outputStream =
      messageStream.map(new ProjectMapFunction(projectId, queryId, logicalOpId));

  List<RexNode> projects = project.getProjects();
  List<Integer> flattenProjects = IntStream.range(0, projects.size())
      .filter(i -> this.isFlatten(projects.get(i)))
      .boxed()
      .collect(Collectors.toList());

  if (flattenProjects.size() > 0) {
    if (flattenProjects.size() > 1) {
      String msg = "Multiple flatten operators in a single query is not supported";
      LOG.error(msg);
      throw new SamzaException(msg);
    }
    outputStream = translateFlatten(flattenProjects.get(0), outputStream);
  }

  context.registerMessageStream(project.getId(), outputStream);
  context.registerRelNode(project.getId(), project);
}
 
Example #20
Source File: BroadcastAssertApp.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  Config config = appDescriptor.getConfig();
  String inputTopic = config.get(INPUT_TOPIC_NAME_PROP);

  final JsonSerdeV2<PageView> serde = new JsonSerdeV2<>(PageView.class);
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> isd = ksd.getInputDescriptor(inputTopic, serde);
  final MessageStream<PageView> broadcastPageViews = appDescriptor
      .getInputStream(isd)
      .broadcast(serde, "pv");

  /**
   * Each task will see all the pageview events
   */
  MessageStreamAssert.that("Each task contains all broadcast PageView events", broadcastPageViews, serde)
      .forEachTask()
      .containsInAnyOrder(
          Arrays.asList(
              new PageView("v1", "p1", "u1"),
              new PageView("v2", "p2", "u1"),
              new PageView("v3", "p1", "u2"),
              new PageView("v4", "p3", "u2")
          ));
}
 
Example #21
Source File: PageViewCounterExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");

  KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor =
      trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));

  KafkaOutputDescriptor<KV<String, PageViewCount>> outputStreamDescriptor =
      trackingSystem.getOutputDescriptor("pageViewEventPerMember",
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewCount.class)));

  MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
  OutputStream<KV<String, PageViewCount>> pageViewEventPerMemberStream = appDescriptor.getOutputStream(outputStreamDescriptor);

  SupplierFunction<Integer> initialValue = () -> 0;
  FoldLeftFunction<PageViewEvent, Integer> foldLeftFn = (m, c) -> c + 1;
  pageViewEvents
      .window(Windows.keyedTumblingWindow(PageViewEvent::getMemberId, Duration.ofSeconds(10), initialValue, foldLeftFn, null, null)
          .setEarlyTrigger(Triggers.repeat(Triggers.count(5)))
          .setAccumulationMode(AccumulationMode.DISCARDING), "tumblingWindow")
      .map(windowPane -> KV.of(windowPane.getKey().getKey(), buildPageViewCount(windowPane)))
      .sendTo(pageViewEventPerMemberStream);
}
 
Example #22
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 5 votes vote down vote up
private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoin() {
  /**
   * Example stream-table join app. Expected partition counts of intermediate streams introduced
   * by partitionBy operations are enclosed in quotes.
   *
   *    input2 (16) -> partitionBy ("32") -> send-to-table t
   *
   *                                      join-table t —————
   *                                       |                |
   *    input1 (64) -> partitionBy ("32") _|                |
   *                                                       join -> output1 (8)
   *                                                        |
   *                                      input3 (32) ——————
   *
   */
  return new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
    MessageStream<KV<Object, Object>> messageStream2 = appDesc.getInputStream(input2Descriptor);
    MessageStream<KV<Object, Object>> messageStream3 = appDesc.getInputStream(input3Descriptor);
    OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);

    TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor(
        "table-id", new KVSerde(new StringSerde(), new StringSerde()));
    Table table = appDesc.getTable(tableDescriptor);

    messageStream2
        .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1")
        .sendTo(table);

    messageStream1
        .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p2")
        .join(table, mock(StreamTableJoinFunction.class))
        .join(messageStream3,
              mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2")
        .sendTo(output1);
  }, config);
}
 
Example #23
Source File: TestExecutionPlanner.java    From samza with Apache License 2.0 5 votes vote down vote up
private StreamApplicationDescriptorImpl createStreamGraphWithInvalidStreamTableJoin() {
  /**
   * Example stream-table join that is invalid due to disagreement in partition count
   * between the 2 input streams.
   *
   *    input1 (64) -> send-to-table t
   *
   *                   join-table t -> output1 (8)
   *                         |
   *    input2 (16) —————————
   *
   */
  return new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
    MessageStream<KV<Object, Object>> messageStream2 = appDesc.getInputStream(input2Descriptor);
    OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);

    TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor(
      "table-id", new KVSerde(new StringSerde(), new StringSerde()));
    Table table = appDesc.getTable(tableDescriptor);

    messageStream1.sendTo(table);

    messageStream1
        .join(table, mock(StreamTableJoinFunction.class))
        .join(messageStream2,
            mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2")
        .sendTo(output1);
  }, config);
}
 
Example #24
Source File: StreamApplicationIntegrationTest.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("test");
  KafkaInputDescriptor<KV<String, PageView>> isd =
      ksd.getInputDescriptor("PageView", KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()));
  MessageStream<KV<String, TestData.PageView>> inputStream = appDescriptor.getInputStream(isd);
  inputStream
      .map(KV::getValue)
      .partitionBy(PageView::getMemberId, pv -> pv, KVSerde.of(new IntegerSerde(), new JsonSerdeV2<>(PageView.class)), "p1")
      .sink((m, collector, coordinator) ->
          collector.send(new OutgoingMessageEnvelope(new SystemStream("test", "Output"), m.getKey(), m.getKey(), m)));
}
 
Example #25
Source File: TestPartitionByOperatorSpec.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testScheduledFunctionAsKeyFn() {
  ScheduledMapFn keyFn = new ScheduledMapFn();
  new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<Object> inputStream = appDesc.getInputStream(testInputDescriptor);
    inputStream.partitionBy(keyFn, m -> m, mock(KVSerde.class), "parByKey");
  }, getConfig());
}
 
Example #26
Source File: TestOperatorImplGraph.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testBroadcastChain() {
  String inputStreamId = "input";
  String inputSystem = "input-system";
  String inputPhysicalName = "input-stream";
  HashMap<String, String> configMap = new HashMap<>();
  configMap.put(JobConfig.JOB_NAME, "test-job");
  configMap.put(JobConfig.JOB_ID, "1");
  StreamTestUtils.addStreamConfigs(configMap, inputStreamId, inputSystem, inputPhysicalName);
  Config config = new MapConfig(configMap);
  when(this.context.getJobContext().getConfig()).thenReturn(config);
  StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
    GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
    GenericInputDescriptor inputDescriptor = sd.getInputDescriptor(inputStreamId, mock(Serde.class));
    MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor);
    inputStream.filter(mock(FilterFunction.class));
    inputStream.map(mock(MapFunction.class));
  }, config);

  OperatorImplGraph opImplGraph =
      new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));

  InputOperatorImpl inputOpImpl = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName));
  assertEquals(2, inputOpImpl.registeredOperators.size());
  assertTrue(inputOpImpl.registeredOperators.stream()
      .anyMatch(opImpl -> ((OperatorImpl) opImpl).getOperatorSpec().getOpCode() == OpCode.FILTER));
  assertTrue(inputOpImpl.registeredOperators.stream()
      .anyMatch(opImpl -> ((OperatorImpl) opImpl).getOperatorSpec().getOpCode() == OpCode.MAP));
}
 
Example #27
Source File: AzureBlobApplication.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  // Define a system descriptor for Kafka
  KafkaSystemDescriptor kafkaSystemDescriptor =
      new KafkaSystemDescriptor("kafka").withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
          .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
          .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KafkaInputDescriptor<PageView> pageViewInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(INPUT_PAGEVIEW_STREAM_ID, new JsonSerdeV2<>(PageView.class));

  // Define a system descriptor for Azure Blob Storage
  GenericSystemDescriptor azureBlobSystemDescriptor =
      new GenericSystemDescriptor(OUTPUT_SYSTEM, "org.apache.samza.system.azureblob.AzureBlobSystemFactory");

  GenericOutputDescriptor<PageViewAvroRecord> azureBlobOuputDescriptor =
      azureBlobSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM, new NoOpSerde<>());

  // Set Kafka as the default system for the job
  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  // Define the input and output streams with descriptors
  MessageStream<PageView> pageViewInput = appDescriptor.getInputStream(pageViewInputDescriptor);
  OutputStream<PageViewAvroRecord> pageViewAvroRecordOutputStream = appDescriptor.getOutputStream(azureBlobOuputDescriptor);

  // Define the execution flow with the high-level API
  pageViewInput
      .map((message) -> {
        LOG.info("Sending: Received PageViewEvent with pageId: " + message.pageId);
        return PageViewAvroRecord.buildPageViewRecord(message);
      })
      .sendTo(pageViewAvroRecordOutputStream);
}
 
Example #28
Source File: TestPartitionByOperatorSpec.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testPartitionBy() {
  MapFunction<Object, String> keyFn = m -> m.toString();
  MapFunction<Object, Object> valueFn = m -> m;
  KVSerde<Object, Object> partitionBySerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream inputStream = appDesc.getInputStream(testInputDescriptor);
    inputStream.partitionBy(keyFn, valueFn, partitionBySerde, testRepartitionedStreamName);
  }, getConfig());
  assertEquals(2, streamAppDesc.getInputOperators().size());
  Map<String, InputOperatorSpec> inputOpSpecs = streamAppDesc.getInputOperators();
  assertTrue(inputOpSpecs.keySet().contains(String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName)));
  InputOperatorSpec inputOpSpec = inputOpSpecs.get(String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName));
  assertEquals(String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName), inputOpSpec.getStreamId());
  assertTrue(inputOpSpec.getKeySerde() instanceof NoOpSerde);
  assertTrue(inputOpSpec.getValueSerde() instanceof NoOpSerde);
  assertTrue(inputOpSpec.isKeyed());
  assertNull(inputOpSpec.getScheduledFn());
  assertNull(inputOpSpec.getWatermarkFn());
  InputOperatorSpec originInputSpec = inputOpSpecs.get(testInputDescriptor.getStreamId());
  assertTrue(originInputSpec.getRegisteredOperatorSpecs().toArray()[0] instanceof PartitionByOperatorSpec);
  PartitionByOperatorSpec reparOpSpec  = (PartitionByOperatorSpec) originInputSpec.getRegisteredOperatorSpecs().toArray()[0];
  assertEquals(reparOpSpec.getOpId(), String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName));
  assertEquals(reparOpSpec.getKeyFunction(), keyFn);
  assertEquals(reparOpSpec.getValueFunction(), valueFn);
  assertEquals(reparOpSpec.getOutputStream().getStreamId(), reparOpSpec.getOpId());
  assertNull(reparOpSpec.getScheduledFn());
  assertNull(reparOpSpec.getWatermarkFn());
}
 
Example #29
Source File: TestPartitionByOperatorSpec.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testCopy() {
  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream inputStream = appDesc.getInputStream(testInputDescriptor);
    inputStream.partitionBy(m -> m.toString(), m -> m, mock(KVSerde.class), testRepartitionedStreamName);
  }, getConfig());
  OperatorSpecGraph specGraph = streamAppDesc.getOperatorSpecGraph();
  OperatorSpecGraph clonedGraph = specGraph.clone();
  OperatorSpecTestUtils.assertClonedGraph(specGraph, clonedGraph);
}
 
Example #30
Source File: StreamTableJoinExample.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  Serde<Profile> profileSerde = new JsonSerdeV2<>(Profile.class);
  Serde<PageView> pageViewSerde = new JsonSerdeV2<>(PageView.class);
  Serde<EnrichedPageView> joinResultSerde = new JsonSerdeV2<>(EnrichedPageView.class);

  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME)
      .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
      .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
      .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KafkaInputDescriptor<Profile> profileInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(PROFILE_STREAM_ID, profileSerde);
  KafkaInputDescriptor<PageView> pageViewInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(PAGEVIEW_STREAM_ID, pageViewSerde);
  KafkaOutputDescriptor<EnrichedPageView> joinResultOutputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_TOPIC, joinResultSerde);

  RocksDbTableDescriptor<String, Profile> profileTableDescriptor =
      new RocksDbTableDescriptor<String, Profile>("profile-table", KVSerde.of(new StringSerde(), profileSerde));

  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  MessageStream<Profile> profileStream = appDescriptor.getInputStream(profileInputDescriptor);
  MessageStream<PageView> pageViewStream = appDescriptor.getInputStream(pageViewInputDescriptor);
  OutputStream<EnrichedPageView> joinResultStream = appDescriptor.getOutputStream(joinResultOutputDescriptor);
  Table<KV<String, Profile>> profileTable = appDescriptor.getTable(profileTableDescriptor);

  profileStream
      .map(profile -> KV.of(profile.userId, profile))
      .sendTo(profileTable);

  pageViewStream
      .partitionBy(pv -> pv.userId, pv -> pv, KVSerde.of(new StringSerde(), pageViewSerde), "join")
      .join(profileTable, new JoinFn())
      .sendTo(joinResultStream);
}