Java Code Examples for org.apache.samza.operators.MessageStream#map()

The following examples show how to use org.apache.samza.operators.MessageStream#map() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ProjectTranslator.java    From samza with Apache License 2.0 6 votes vote down vote up
void translate(final Project project, final String logicalOpId, final TranslatorContext context) {
  MessageStream<SamzaSqlRelMessage> messageStream = context.getMessageStream(project.getInput().getId());

  final int projectId = project.getId();

  MessageStream<SamzaSqlRelMessage> outputStream =
      messageStream.map(new ProjectMapFunction(projectId, queryId, logicalOpId));

  List<RexNode> projects = project.getProjects();
  List<Integer> flattenProjects = IntStream.range(0, projects.size())
      .filter(i -> this.isFlatten(projects.get(i)))
      .boxed()
      .collect(Collectors.toList());

  if (flattenProjects.size() > 0) {
    if (flattenProjects.size() > 1) {
      String msg = "Multiple flatten operators in a single query is not supported";
      LOG.error(msg);
      throw new SamzaException(msg);
    }
    outputStream = translateFlatten(flattenProjects.get(0), outputStream);
  }

  context.registerMessageStream(project.getId(), outputStream);
  context.registerRelNode(project.getId(), project);
}
 
Example 2
Source File: SamzaPublishViewTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
private static <ElemT, ViewT> void doTranslate(
    SamzaPublishView<ElemT, ViewT> transform,
    TransformHierarchy.Node node,
    TranslationContext ctx) {

  final PCollection<List<ElemT>> input = ctx.getInput(transform);
  final MessageStream<OpMessage<Iterable<ElemT>>> inputStream = ctx.getMessageStream(input);
  @SuppressWarnings("unchecked")
  final Coder<WindowedValue<Iterable<ElemT>>> elementCoder = (Coder) SamzaCoders.of(input);

  final MessageStream<WindowedValue<Iterable<ElemT>>> elementStream =
      inputStream
          .filter(msg -> msg.getType() == OpMessage.Type.ELEMENT)
          .map(OpMessage::getElement);

  // TODO: once SAMZA-1580 is resolved, this optimization will go directly inside Samza
  final MessageStream<WindowedValue<Iterable<ElemT>>> broadcastStream =
      ctx.getPipelineOptions().getMaxSourceParallelism() == 1
          ? elementStream
          : elementStream.broadcast(
              SamzaCoders.toSerde(elementCoder), "view-" + ctx.getTransformId());

  final String viewId = ctx.getViewId(transform.getView());
  final MessageStream<OpMessage<Iterable<ElemT>>> outputStream =
      broadcastStream.map(element -> OpMessage.ofSideInput(viewId, element));

  ctx.registerViewStream(transform.getView(), outputStream);
}
 
Example 3
Source File: SystemConsumerWithSamzaBench.java    From samza with Apache License 2.0 5 votes vote down vote up
public void start() throws IOException, InterruptedException {
  super.start();
  MessageConsumer consumeFn = new MessageConsumer();
  StreamApplication app = appDesc -> {
    String systemFactoryName = new SystemConfig(config).getSystemFactory(systemName).get();
    GenericSystemDescriptor sd = new GenericSystemDescriptor(systemName, systemFactoryName);
    GenericInputDescriptor<Object> isd = sd.getInputDescriptor(streamId, new NoOpSerde<>());
    MessageStream<Object> stream = appDesc.getInputStream(isd);
    stream.map(consumeFn);
  };
  ApplicationRunner runner = ApplicationRunners.getApplicationRunner(app, new MapConfig());

  runner.run();

  while (consumeFn.getEventsConsumed() < totalEvents) {
    Thread.sleep(10);
  }

  Instant endTime = Instant.now();

  runner.kill();

  System.out.println("\n*******************");
  System.out.println(String.format("Started at %s Ending at %s ", consumeFn.startTime, endTime));
  System.out.println(String.format("Event Rate is %s Messages/Sec ",
      consumeFn.getEventsConsumed() * 1000 / Duration.between(consumeFn.startTime, Instant.now()).toMillis()));

  System.out.println(
      "Event Rate is " + consumeFn.getEventsConsumed() * 1000 / Duration.between(consumeFn.startTime, endTime).toMillis());
  System.out.println("*******************\n");

  System.exit(0);
}
 
Example 4
Source File: TestOperatorImplGraph.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testBroadcastChain() {
  String inputStreamId = "input";
  String inputSystem = "input-system";
  String inputPhysicalName = "input-stream";
  HashMap<String, String> configMap = new HashMap<>();
  configMap.put(JobConfig.JOB_NAME, "test-job");
  configMap.put(JobConfig.JOB_ID, "1");
  StreamTestUtils.addStreamConfigs(configMap, inputStreamId, inputSystem, inputPhysicalName);
  Config config = new MapConfig(configMap);
  when(this.context.getJobContext().getConfig()).thenReturn(config);
  StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
    GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
    GenericInputDescriptor inputDescriptor = sd.getInputDescriptor(inputStreamId, mock(Serde.class));
    MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor);
    inputStream.filter(mock(FilterFunction.class));
    inputStream.map(mock(MapFunction.class));
  }, config);

  OperatorImplGraph opImplGraph =
      new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));

  InputOperatorImpl inputOpImpl = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName));
  assertEquals(2, inputOpImpl.registeredOperators.size());
  assertTrue(inputOpImpl.registeredOperators.stream()
      .anyMatch(opImpl -> ((OperatorImpl) opImpl).getOperatorSpec().getOpCode() == OpCode.FILTER));
  assertTrue(inputOpImpl.registeredOperators.stream()
      .anyMatch(opImpl -> ((OperatorImpl) opImpl).getOperatorSpec().getOpCode() == OpCode.MAP));
}
 
Example 5
Source File: TestOperatorImplGraph.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testMergeChain() {
  String inputStreamId = "input";
  String inputSystem = "input-system";
  StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
    GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
    GenericInputDescriptor inputDescriptor = sd.getInputDescriptor(inputStreamId, mock(Serde.class));
    MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor);
    MessageStream<Object> stream1 = inputStream.filter(mock(FilterFunction.class));
    MessageStream<Object> stream2 = inputStream.map(mock(MapFunction.class));
    stream1.merge(Collections.singleton(stream2))
        .map(new TestMapFunction<Object, Object>("test-map-1", (Function & Serializable) m -> m));
  }, getConfig());

  TaskName mockTaskName = mock(TaskName.class);
  TaskModel taskModel = mock(TaskModel.class);
  when(taskModel.getTaskName()).thenReturn(mockTaskName);
  when(this.context.getTaskContext().getTaskModel()).thenReturn(taskModel);

  OperatorImplGraph opImplGraph =
      new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));

  Set<OperatorImpl> opSet = opImplGraph.getAllInputOperators().stream().collect(HashSet::new,
    (s, op) -> addOperatorRecursively(s, op), HashSet::addAll);
  Object[] mergeOps = opSet.stream().filter(op -> op.getOperatorSpec().getOpCode() == OpCode.MERGE).toArray();
  assertEquals(1, mergeOps.length);
  assertEquals(1, ((OperatorImpl) mergeOps[0]).registeredOperators.size());
  OperatorImpl mapOp = (OperatorImpl) ((OperatorImpl) mergeOps[0]).registeredOperators.iterator().next();
  assertEquals(mapOp.getOperatorSpec().getOpCode(), OpCode.MAP);

  // verify that the DAG after merge is only traversed & initialized once
  assertEquals(TestMapFunction.getInstanceByTaskName(mockTaskName, "test-map-1").numInitCalled, 1);
}
 
Example 6
Source File: LogicalAggregateTranslator.java    From samza with Apache License 2.0 5 votes vote down vote up
void translate(final LogicalAggregate aggregate, final TranslatorContext context) {
  validateAggregateFunctions(aggregate);

  MessageStream<SamzaSqlRelMessage> inputStream = context.getMessageStream(aggregate.getInput().getId());

  // At this point, the assumption is that only count function is supported.
  SupplierFunction<Long> initialValue = () -> (long) 0;
  FoldLeftFunction<SamzaSqlRelMessage, Long> foldCountFn = (m, c) -> c + 1;

  final ArrayList<String> aggFieldNames = getAggFieldNames(aggregate);

  MessageStream<SamzaSqlRelMessage> outputStream =
      inputStream
          .map(new TranslatorInputMetricsMapFunction(logicalOpId))
          .window(Windows.keyedTumblingWindow(m -> m,
              Duration.ofMillis(context.getExecutionContext().getSamzaSqlApplicationConfig().getWindowDurationMs()),
              initialValue,
              foldCountFn,
              new SamzaSqlRelMessageSerdeFactory.SamzaSqlRelMessageSerde(),
              new LongSerde())
              .setAccumulationMode(
                  AccumulationMode.DISCARDING), changeLogStorePrefix + "_tumblingWindow_" + logicalOpId)
          .map(windowPane -> {
            List<String> fieldNames = windowPane.getKey().getKey().getSamzaSqlRelRecord().getFieldNames();
            List<Object> fieldValues = windowPane.getKey().getKey().getSamzaSqlRelRecord().getFieldValues();
            fieldNames.add(aggFieldNames.get(0));
            fieldValues.add(windowPane.getMessage());
            return new SamzaSqlRelMessage(fieldNames, fieldValues, new SamzaSqlRelMsgMetadata(0L, 0L));
          });
  context.registerMessageStream(aggregate.getId(), outputStream);
  outputStream.map(new TranslatorOutputMetricsMapFunction(logicalOpId));
}
 
Example 7
Source File: QueryTranslator.java    From samza with Apache License 2.0 5 votes vote down vote up
private void sendToOutputStream(String queryLogicalId, String logicalOpId, String sinkStream,
    StreamApplicationDescriptor appDesc, TranslatorContext translatorContext, RelNode node, int queryId) {
  SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(sinkStream);
  MessageStream<SamzaSqlRelMessage> stream = translatorContext.getMessageStream(node.getId());
  MessageStream<KV<Object, Object>> outputStream =
      stream.map(new OutputMapFunction(queryLogicalId, logicalOpId, sinkStream, queryId));
  Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor();
  if (!tableDescriptor.isPresent()) {
    KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
    String systemName = sinkConfig.getSystemName();
    DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new);
    GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamId(), noOpKVSerde);
    OutputStream stm = outputMsgStreams.computeIfAbsent(sinkConfig.getSource(), v -> appDesc.getOutputStream(osd));
    outputStream.sendTo(stm);

    // Process system events only if the output is a stream.
    if (sqlConfig.isProcessSystemEvents()) {
      for (MessageStream<SamzaSqlInputMessage> inputStream : inputMsgStreams.values()) {
        MessageStream<KV<Object, Object>> systemEventStream =
            inputStream.filter(message -> message.getMetadata().isSystemMessage())
                .map(SamzaSqlInputMessage::getKeyAndMessageKV);

        systemEventStream.sendTo(stm);
      }
    }
  } else {
    Table outputTable = appDesc.getTable(tableDescriptor.get());
    if (outputTable == null) {
      String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource();
      throw new SamzaException(msg);
    }
    outputStream.sendTo(outputTable);
  }
}
 
Example 8
Source File: TranslationContext.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <T> MessageStream<T> getValueStream(
    MessageStream<org.apache.samza.operators.KV<?, T>> input) {
  return input.map(org.apache.samza.operators.KV::getValue);
}
 
Example 9
Source File: JoinTranslator.java    From samza with Apache License 2.0 4 votes vote down vote up
void translate(final LogicalJoin join, final TranslatorContext translatorContext) {
  JoinInputNode.InputType inputTypeOnLeft = getInputType(join.getLeft(), translatorContext);
  JoinInputNode.InputType inputTypeOnRight = getInputType(join.getRight(), translatorContext);

  // Do the validation of join query
  validateJoinQuery(join, inputTypeOnLeft, inputTypeOnRight);

  // At this point, one of the sides is a table. Let's figure out if it is on left or right side.
  boolean isTablePosOnRight = inputTypeOnRight != JoinInputNode.InputType.STREAM;

  // stream and table keyIds are used to extract the join condition field (key) names and values out of the stream
  // and table records.
  List<Integer> streamKeyIds = new LinkedList<>();
  List<Integer> tableKeyIds = new LinkedList<>();

  // Fetch the stream and table indices corresponding to the fields given in the join condition.

  final int leftSideSize = join.getLeft().getRowType().getFieldCount();
  final int tableStartIdx = isTablePosOnRight ? leftSideSize : 0;
  final int streamStartIdx = isTablePosOnRight ? 0 : leftSideSize;
  final int tableEndIdx = isTablePosOnRight ? join.getRowType().getFieldCount() : leftSideSize;
  join.getCondition().accept(new RexShuttle() {
    @Override
    public RexNode visitInputRef(RexInputRef inputRef) {
      validateJoinKeyType(inputRef); // Validate the type of the input ref.
      int index = inputRef.getIndex();
      if (index >= tableStartIdx && index < tableEndIdx) {
        tableKeyIds.add(index - tableStartIdx);
      } else {
        streamKeyIds.add(index - streamStartIdx);
      }
      return inputRef;
    }
  });
  Collections.sort(tableKeyIds);
  Collections.sort(streamKeyIds);

  // Get the two input nodes (stream and table nodes) for the join.
  JoinInputNode streamNode = new JoinInputNode(isTablePosOnRight ? join.getLeft() : join.getRight(), streamKeyIds,
      isTablePosOnRight ? inputTypeOnLeft : inputTypeOnRight, !isTablePosOnRight);
  JoinInputNode tableNode = new JoinInputNode(isTablePosOnRight ? join.getRight() : join.getLeft(), tableKeyIds,
      isTablePosOnRight ? inputTypeOnRight : inputTypeOnLeft, isTablePosOnRight);

  MessageStream<SamzaSqlRelMessage> inputStream = translatorContext.getMessageStream(streamNode.getRelNode().getId());
  Table table = getTable(tableNode, translatorContext);

  MessageStream<SamzaSqlRelMessage> outputStream =
      joinStreamWithTable(inputStream, table, streamNode, tableNode, join, translatorContext);

  translatorContext.registerMessageStream(join.getId(), outputStream);

  outputStream.map(outputMetricsMF);
}
 
Example 10
Source File: RepartitionJoinWindowApp.java    From samza with Apache License 2.0 4 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  // offset.default = oldest required for tests since checkpoint topic is empty on start and messages are published
  // before the application is run
  Config config = appDescriptor.getConfig();
  String inputTopic1 = config.get(INPUT_TOPIC_1_CONFIG_KEY);
  String inputTopic2 = config.get(INPUT_TOPIC_2_CONFIG_KEY);
  String outputTopic = config.get(OUTPUT_TOPIC_CONFIG_KEY);
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> id1 = ksd.getInputDescriptor(inputTopic1, new JsonSerdeV2<>(PageView.class));
  KafkaInputDescriptor<AdClick> id2 = ksd.getInputDescriptor(inputTopic2, new JsonSerdeV2<>(AdClick.class));

  MessageStream<PageView> pageViews = appDescriptor.getInputStream(id1);
  MessageStream<AdClick> adClicks = appDescriptor.getInputStream(id2);

  MessageStream<KV<String, PageView>> pageViewsRepartitionedByViewId = pageViews
      .partitionBy(PageView::getViewId, pv -> pv,
          new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(PageView.class)), "pageViewsByViewId");

  MessageStream<PageView> pageViewsRepartitionedByViewIdValueONly = pageViewsRepartitionedByViewId.map(KV::getValue);

  MessageStream<KV<String, AdClick>> adClicksRepartitionedByViewId = adClicks
      .partitionBy(AdClick::getViewId, ac -> ac,
          new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(AdClick.class)), "adClicksByViewId");
  MessageStream<AdClick> adClicksRepartitionedByViewIdValueOnly = adClicksRepartitionedByViewId.map(KV::getValue);

  MessageStream<UserPageAdClick> userPageAdClicks = pageViewsRepartitionedByViewIdValueONly
      .join(adClicksRepartitionedByViewIdValueOnly, new UserPageViewAdClicksJoiner(),
          new StringSerde(), new JsonSerdeV2<>(PageView.class), new JsonSerdeV2<>(AdClick.class),
          Duration.ofMinutes(1), "pageViewAdClickJoin");

  MessageStream<KV<String, UserPageAdClick>> userPageAdClicksByUserId = userPageAdClicks
      .partitionBy(UserPageAdClick::getUserId, upac -> upac,
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userPageAdClicksByUserId");

  userPageAdClicksByUserId.map(KV::getValue)
      .window(Windows.keyedSessionWindow(UserPageAdClick::getUserId, Duration.ofSeconds(3),
          new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userAdClickWindow")
      .map(windowPane -> KV.of(windowPane.getKey().getKey(), String.valueOf(windowPane.getMessage().size())))
      .sink((message, messageCollector, taskCoordinator) -> {
        taskCoordinator.commit(TaskCoordinator.RequestScope.ALL_TASKS_IN_CONTAINER);
        messageCollector.send(
            new OutgoingMessageEnvelope(
                new SystemStream("kafka", outputTopic), null, message.getKey(), message.getValue()));
      });


  intermediateStreamIds.add(((IntermediateMessageStreamImpl) pageViewsRepartitionedByViewId).getStreamId());
  intermediateStreamIds.add(((IntermediateMessageStreamImpl) adClicksRepartitionedByViewId).getStreamId());
  intermediateStreamIds.add(((IntermediateMessageStreamImpl) userPageAdClicksByUserId).getStreamId());
}