Java Code Examples for org.apache.samza.operators.MessageStream#partitionBy()

The following examples show how to use org.apache.samza.operators.MessageStream#partitionBy() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestPartitionByOperatorSpec.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testPartitionByWithNoSerde() {
  MapFunction<Object, String> keyFn = m -> m.toString();
  MapFunction<Object, Object> valueFn = m -> m;
  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream inputStream = appDesc.getInputStream(testInputDescriptor);
    inputStream.partitionBy(keyFn, valueFn, mock(KVSerde.class), testRepartitionedStreamName);
  }, getConfig());
  InputOperatorSpec inputOpSpec = streamAppDesc.getInputOperators().get(
      String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName));
  assertNotNull(inputOpSpec);
  assertNull(inputOpSpec.getKeySerde());
  assertNull(inputOpSpec.getValueSerde());
  assertTrue(inputOpSpec.isKeyed());
  assertNull(inputOpSpec.getScheduledFn());
  assertNull(inputOpSpec.getWatermarkFn());
  InputOperatorSpec originInputSpec = streamAppDesc.getInputOperators().get(testInputDescriptor.getStreamId());
  assertTrue(originInputSpec.getRegisteredOperatorSpecs().toArray()[0] instanceof PartitionByOperatorSpec);
  PartitionByOperatorSpec reparOpSpec  = (PartitionByOperatorSpec) originInputSpec.getRegisteredOperatorSpecs().toArray()[0];
  assertEquals(reparOpSpec.getOpId(), String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName));
  assertEquals(reparOpSpec.getKeyFunction(), keyFn);
  assertEquals(reparOpSpec.getValueFunction(), valueFn);
  assertEquals(reparOpSpec.getOutputStream().getStreamId(), reparOpSpec.getOpId());
  assertNull(reparOpSpec.getScheduledFn());
  assertNull(reparOpSpec.getWatermarkFn());
}
 
Example 2
Source File: TestPartitionByOperatorSpec.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testPartitionBy() {
  MapFunction<Object, String> keyFn = m -> m.toString();
  MapFunction<Object, Object> valueFn = m -> m;
  KVSerde<Object, Object> partitionBySerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream inputStream = appDesc.getInputStream(testInputDescriptor);
    inputStream.partitionBy(keyFn, valueFn, partitionBySerde, testRepartitionedStreamName);
  }, getConfig());
  assertEquals(2, streamAppDesc.getInputOperators().size());
  Map<String, InputOperatorSpec> inputOpSpecs = streamAppDesc.getInputOperators();
  assertTrue(inputOpSpecs.keySet().contains(String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName)));
  InputOperatorSpec inputOpSpec = inputOpSpecs.get(String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName));
  assertEquals(String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName), inputOpSpec.getStreamId());
  assertTrue(inputOpSpec.getKeySerde() instanceof NoOpSerde);
  assertTrue(inputOpSpec.getValueSerde() instanceof NoOpSerde);
  assertTrue(inputOpSpec.isKeyed());
  assertNull(inputOpSpec.getScheduledFn());
  assertNull(inputOpSpec.getWatermarkFn());
  InputOperatorSpec originInputSpec = inputOpSpecs.get(testInputDescriptor.getStreamId());
  assertTrue(originInputSpec.getRegisteredOperatorSpecs().toArray()[0] instanceof PartitionByOperatorSpec);
  PartitionByOperatorSpec reparOpSpec  = (PartitionByOperatorSpec) originInputSpec.getRegisteredOperatorSpecs().toArray()[0];
  assertEquals(reparOpSpec.getOpId(), String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName));
  assertEquals(reparOpSpec.getKeyFunction(), keyFn);
  assertEquals(reparOpSpec.getValueFunction(), valueFn);
  assertEquals(reparOpSpec.getOutputStream().getStreamId(), reparOpSpec.getOpId());
  assertNull(reparOpSpec.getScheduledFn());
  assertNull(reparOpSpec.getWatermarkFn());
}
 
Example 3
Source File: TestPartitionByOperatorSpec.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testCopy() {
  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream inputStream = appDesc.getInputStream(testInputDescriptor);
    inputStream.partitionBy(m -> m.toString(), m -> m, mock(KVSerde.class), testRepartitionedStreamName);
  }, getConfig());
  OperatorSpecGraph specGraph = streamAppDesc.getOperatorSpecGraph();
  OperatorSpecGraph clonedGraph = specGraph.clone();
  OperatorSpecTestUtils.assertClonedGraph(specGraph, clonedGraph);
}
 
Example 4
Source File: TestPartitionByOperatorSpec.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testScheduledFunctionAsKeyFn() {
  ScheduledMapFn keyFn = new ScheduledMapFn();
  new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<Object> inputStream = appDesc.getInputStream(testInputDescriptor);
    inputStream.partitionBy(keyFn, m -> m, mock(KVSerde.class), "parByKey");
  }, getConfig());
}
 
Example 5
Source File: TestPartitionByOperatorSpec.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testWatermarkFunctionAsKeyFn() {
  WatermarkMapFn keyFn = new WatermarkMapFn();
  new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<Object> inputStream = appDesc.getInputStream(testInputDescriptor);
    inputStream.partitionBy(keyFn, m -> m, mock(KVSerde.class), "parByKey");
  }, getConfig());
}
 
Example 6
Source File: TestPartitionByOperatorSpec.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testScheduledFunctionAsValueFn() {
  ScheduledMapFn valueFn = new ScheduledMapFn();
  new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<Object> inputStream = appDesc.getInputStream(testInputDescriptor);
    inputStream.partitionBy(m -> m.toString(), valueFn, mock(KVSerde.class), "parByKey");
  }, getConfig());
}
 
Example 7
Source File: TestPartitionByOperatorSpec.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testWatermarkFunctionAsValueFn() {
  WatermarkMapFn valueFn = new WatermarkMapFn();
  new StreamApplicationDescriptorImpl(appDesc -> {
    MessageStream<Object> inputStream = appDesc.getInputStream(testInputDescriptor);
    inputStream.partitionBy(m -> m.toString(), valueFn, mock(KVSerde.class), "parByKey");
  }, getConfig());
}
 
Example 8
Source File: ExecutionPlannerTestBase.java    From samza with Apache License 2.0 4 votes vote down vote up
StreamApplication getRepartitionOnlyStreamApplication() {
  return appDesc -> {
    MessageStream<KV<String, Object>> input1 = appDesc.getInputStream(input1Descriptor);
    input1.partitionBy(KV::getKey, KV::getValue, mock(KVSerde.class), "p1");
  };
}
 
Example 9
Source File: RepartitionJoinWindowApp.java    From samza with Apache License 2.0 4 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  // offset.default = oldest required for tests since checkpoint topic is empty on start and messages are published
  // before the application is run
  Config config = appDescriptor.getConfig();
  String inputTopic1 = config.get(INPUT_TOPIC_1_CONFIG_KEY);
  String inputTopic2 = config.get(INPUT_TOPIC_2_CONFIG_KEY);
  String outputTopic = config.get(OUTPUT_TOPIC_CONFIG_KEY);
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> id1 = ksd.getInputDescriptor(inputTopic1, new JsonSerdeV2<>(PageView.class));
  KafkaInputDescriptor<AdClick> id2 = ksd.getInputDescriptor(inputTopic2, new JsonSerdeV2<>(AdClick.class));

  MessageStream<PageView> pageViews = appDescriptor.getInputStream(id1);
  MessageStream<AdClick> adClicks = appDescriptor.getInputStream(id2);

  MessageStream<KV<String, PageView>> pageViewsRepartitionedByViewId = pageViews
      .partitionBy(PageView::getViewId, pv -> pv,
          new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(PageView.class)), "pageViewsByViewId");

  MessageStream<PageView> pageViewsRepartitionedByViewIdValueONly = pageViewsRepartitionedByViewId.map(KV::getValue);

  MessageStream<KV<String, AdClick>> adClicksRepartitionedByViewId = adClicks
      .partitionBy(AdClick::getViewId, ac -> ac,
          new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(AdClick.class)), "adClicksByViewId");
  MessageStream<AdClick> adClicksRepartitionedByViewIdValueOnly = adClicksRepartitionedByViewId.map(KV::getValue);

  MessageStream<UserPageAdClick> userPageAdClicks = pageViewsRepartitionedByViewIdValueONly
      .join(adClicksRepartitionedByViewIdValueOnly, new UserPageViewAdClicksJoiner(),
          new StringSerde(), new JsonSerdeV2<>(PageView.class), new JsonSerdeV2<>(AdClick.class),
          Duration.ofMinutes(1), "pageViewAdClickJoin");

  MessageStream<KV<String, UserPageAdClick>> userPageAdClicksByUserId = userPageAdClicks
      .partitionBy(UserPageAdClick::getUserId, upac -> upac,
          KVSerde.of(new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userPageAdClicksByUserId");

  userPageAdClicksByUserId.map(KV::getValue)
      .window(Windows.keyedSessionWindow(UserPageAdClick::getUserId, Duration.ofSeconds(3),
          new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userAdClickWindow")
      .map(windowPane -> KV.of(windowPane.getKey().getKey(), String.valueOf(windowPane.getMessage().size())))
      .sink((message, messageCollector, taskCoordinator) -> {
        taskCoordinator.commit(TaskCoordinator.RequestScope.ALL_TASKS_IN_CONTAINER);
        messageCollector.send(
            new OutgoingMessageEnvelope(
                new SystemStream("kafka", outputTopic), null, message.getKey(), message.getValue()));
      });


  intermediateStreamIds.add(((IntermediateMessageStreamImpl) pageViewsRepartitionedByViewId).getStreamId());
  intermediateStreamIds.add(((IntermediateMessageStreamImpl) adClicksRepartitionedByViewId).getStreamId());
  intermediateStreamIds.add(((IntermediateMessageStreamImpl) userPageAdClicksByUserId).getStreamId());
}