Java Code Examples for org.apache.samza.serializers.KVSerde#of()

The following examples show how to use org.apache.samza.serializers.KVSerde#of() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ImpulseTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void translatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {

  final String outputId = ctx.getOutputId(transform);
  final GenericSystemDescriptor systemDescriptor =
      new GenericSystemDescriptor(outputId, SamzaImpulseSystemFactory.class.getName());

  // The KvCoder is needed here for Samza not to crop the key.
  final Serde<KV<?, OpMessage<byte[]>>> kvSerde = KVSerde.of(new NoOpSerde(), new NoOpSerde<>());
  final GenericInputDescriptor<KV<?, OpMessage<byte[]>>> inputDescriptor =
      systemDescriptor.getInputDescriptor(outputId, kvSerde);

  ctx.registerInputMessageStream(outputId, inputDescriptor);
}
 
Example 2
Source File: TransactionalStateMultiStoreIntegrationTest.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(INPUT_SYSTEM);
  KVSerde<String, String> serde = KVSerde.of(new StringSerde(), new StringSerde());

  KafkaInputDescriptor<KV<String, String>> isd = ksd.getInputDescriptor(INPUT_TOPIC, serde);

  RocksDbTableDescriptor<String, String> td1 = new RocksDbTableDescriptor<>(STORE_1_NAME, serde)
      .withChangelogStream(changelogTopic)
      .withChangelogReplicationFactor(1);

  RocksDbTableDescriptor<String, String> td2 = new RocksDbTableDescriptor<>(STORE_2_NAME, serde)
      .withChangelogStream(STORE_2_CHANGELOG)
      .withChangelogReplicationFactor(1);

  appDescriptor
      .withInputStream(isd)
      .withTaskFactory((StreamTaskFactory) () -> new MyTask())
      .withTable(td1)
      .withTable(td2);
}
 
Example 3
Source File: BroadcastExample.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KVSerde<String, PageViewEvent> serde = KVSerde.of(new StringSerde("UTF-8"), new JsonSerdeV2<>(PageViewEvent.class));
  KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
  KafkaInputDescriptor<KV<String, PageViewEvent>> pageViewEvent =
      trackingSystem.getInputDescriptor("pageViewEvent", serde);
  KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream1 =
      trackingSystem.getOutputDescriptor("outStream1", serde);
  KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream2 =
      trackingSystem.getOutputDescriptor("outStream2", serde);
  KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream3 =
      trackingSystem.getOutputDescriptor("outStream3", serde);

  MessageStream<KV<String, PageViewEvent>> inputStream = appDescriptor.getInputStream(pageViewEvent);
  inputStream.filter(m -> m.key.equals("key1")).sendTo(appDescriptor.getOutputStream(outStream1));
  inputStream.filter(m -> m.key.equals("key2")).sendTo(appDescriptor.getOutputStream(outStream2));
  inputStream.filter(m -> m.key.equals("key3")).sendTo(appDescriptor.getOutputStream(outStream3));
}
 
Example 4
Source File: FilterExample.java    From samza-hello-samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME)
      .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
      .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
      .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KVSerde<String, PageView> serde = KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageView.class));
  KafkaInputDescriptor<KV<String, PageView>> inputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(INPUT_STREAM_ID, serde);
  KafkaOutputDescriptor<KV<String, PageView>> outputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM_ID, serde);

  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  MessageStream<KV<String, PageView>> pageViews = appDescriptor.getInputStream(inputDescriptor);
  OutputStream<KV<String, PageView>> filteredPageViews = appDescriptor.getOutputStream(outputDescriptor);

  pageViews
      .filter(kv -> !INVALID_USER_ID.equals(kv.value.userId))
      .sendTo(filteredPageViews);
}
 
Example 5
Source File: RepartitionWindowApp.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KVSerde<String, PageView> inputSerde = KVSerde.of(new StringSerde("UTF-8"), new JsonSerdeV2<>(PageView.class));
  KVSerde<String, String> outputSerde = KVSerde.of(new StringSerde(), new StringSerde());
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<KV<String, PageView>> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
  KafkaOutputDescriptor<KV<String, String>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);

  appDescriptor.getInputStream(id)
      .map(KV::getValue)
      .partitionBy(PageView::getUserId, m -> m, inputSerde, "p1")
      .window(Windows.keyedSessionWindow(m -> m.getKey(), Duration.ofSeconds(3), () -> 0, (m, c) -> c + 1, new StringSerde("UTF-8"), new IntegerSerde()), "w1")
      .map(wp -> KV.of(wp.getKey().getKey().toString(), String.valueOf(wp.getMessage())))
      .sendTo(appDescriptor.getOutputStream(od));

}
 
Example 6
Source File: TestJoinOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test(expected = SamzaException.class)
public void joinWithSelfThrowsException() throws Exception {
  Map<String, String> mapConfig = new HashMap<>();
  mapConfig.put("job.name", "jobName");
  mapConfig.put("job.id", "jobId");
  StreamTestUtils.addStreamConfigs(mapConfig, "inStream", "insystem", "instream");
  Config config = new MapConfig(mapConfig);

  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
    IntegerSerde integerSerde = new IntegerSerde();
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(integerSerde, integerSerde);
    GenericSystemDescriptor sd = new GenericSystemDescriptor("insystem", "mockFactoryClassName");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("inStream", kvSerde);

    MessageStream<KV<Integer, Integer>> inStream = appDesc.getInputStream(inputDescriptor);

    inStream.join(inStream, new TestJoinFunction(), integerSerde, kvSerde, kvSerde, JOIN_TTL, "join");
  }, config);

  createStreamOperatorTask(new SystemClock(), streamAppDesc); // should throw an exception
}
 
Example 7
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
@Before
public void setup() {
  Map<String, String> configMap = new HashMap<>();
  configMap.put("job.default.system", "kafka");
  configMap.put("job.name", "jobName");
  configMap.put("job.id", "jobId");
  this.config = new MapConfig(configMap);

  this.context = new MockContext();
  when(this.context.getJobContext().getConfig()).thenReturn(this.config);
  Serde storeKeySerde = new TimeSeriesKeySerde(new IntegerSerde());
  Serde storeValSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());

  TaskModel taskModel = mock(TaskModel.class);
  when(taskModel.getSystemStreamPartitions()).thenReturn(ImmutableSet
      .of(new SystemStreamPartition("kafka", "integers", new Partition(0))));
  when(taskModel.getTaskName()).thenReturn(new TaskName("task 1"));
  when(this.context.getTaskContext().getTaskModel()).thenReturn(taskModel);
  when(this.context.getTaskContext().getTaskMetricsRegistry()).thenReturn(new MetricsRegistryMap());
  when(this.context.getContainerContext().getContainerMetricsRegistry()).thenReturn(new MetricsRegistryMap());
  when(this.context.getTaskContext().getStore("jobName-jobId-window-w1"))
      .thenReturn(new TestInMemoryStore<>(storeKeySerde, storeValSerde));
}
 
Example 8
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl getAggregateTumblingWindowStreamGraph(AccumulationMode mode, Duration timeDuration,
      Trigger<IntegerEnvelope> earlyTrigger) throws IOException {
  StreamApplication userApp = appDesc -> {
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());
    GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde);
    MessageStream<KV<Integer, Integer>> integers = appDesc.getInputStream(inputDescriptor);

    integers
        .map(new KVMapFunction())
        .window(Windows.<IntegerEnvelope, Integer>tumblingWindow(timeDuration, () -> 0, (m, c) -> c + 1, new IntegerSerde())
            .setEarlyTrigger(earlyTrigger)
            .setAccumulationMode(mode), "w1")
        .sink((message, messageCollector, taskCoordinator) -> {
          SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream");
          messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message));
        });
  };

  return new StreamApplicationDescriptorImpl(userApp, config);
}
 
Example 9
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl getTumblingWindowStreamGraph(AccumulationMode mode,
    Duration duration, Trigger<KV<Integer, Integer>> earlyTrigger) throws IOException {
  StreamApplication userApp = appDesc -> {
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());
    GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde);
    appDesc.getInputStream(inputDescriptor)
        .window(Windows.tumblingWindow(duration, kvSerde).setEarlyTrigger(earlyTrigger)
            .setAccumulationMode(mode), "w1")
        .sink((message, messageCollector, taskCoordinator) -> {
          SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream");
          messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message));
        });
  };

  return new StreamApplicationDescriptorImpl(userApp, config);
}
 
Example 10
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl getKeyedSessionWindowStreamGraph(AccumulationMode mode, Duration duration) throws IOException {
  StreamApplication userApp = appDesc -> {
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());
    GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde);
    appDesc.getInputStream(inputDescriptor)
        .window(Windows.keyedSessionWindow(KV::getKey, duration, new IntegerSerde(), kvSerde)
            .setAccumulationMode(mode), "w1")
        .sink((message, messageCollector, taskCoordinator) -> {
          SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream");
          messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message));
        });
  };

  return new StreamApplicationDescriptorImpl(userApp, config);
}
 
Example 11
Source File: EventHubsInputDescriptor.java    From samza with Apache License 2.0 5 votes vote down vote up
/**
 * Constructs an {@link InputDescriptor} instance.
 *
 * @param streamId id of the stream
 * @param namespace namespace for the Event Hubs entity to consume from, not null
 * @param entityPath entity path for the Event Hubs entity to consume from, not null
 * @param valueSerde serde the values in the messages in the stream
 * @param systemDescriptor system descriptor this stream descriptor was obtained from
 */
EventHubsInputDescriptor(String streamId, String namespace, String entityPath, Serde valueSerde,
    SystemDescriptor systemDescriptor) {
  super(streamId, KVSerde.of(new NoOpSerde<>(), valueSerde), systemDescriptor, null);
  this.namespace = StringUtils.stripToNull(namespace);
  this.entityPath = StringUtils.stripToNull(entityPath);
  if (this.namespace == null || this.entityPath == null) {
    throw new ConfigException(String.format("Missing namespace and entity path Event Hubs input descriptor in " //
        + "system: {%s}, stream: {%s}", getSystemName(), streamId));
  }
}
 
Example 12
Source File: TestStandaloneIntegrationApplication.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  String systemName = "testSystemName";
  String inputStreamName = appDescriptor.getConfig().get("input.stream.name");
  String outputStreamName = "standaloneIntegrationTestKafkaOutputTopic";
  LOGGER.info("Publishing message from: {} to: {}.", inputStreamName, outputStreamName);
  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(systemName);

  KVSerde<Object, Object> noOpSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
  KafkaInputDescriptor<KV<Object, Object>> isd =
      kafkaSystemDescriptor.getInputDescriptor(inputStreamName, noOpSerde);
  KafkaOutputDescriptor<KV<Object, Object>> osd =
      kafkaSystemDescriptor.getOutputDescriptor(outputStreamName, noOpSerde);
  appDescriptor.getInputStream(isd).sendTo(appDescriptor.getOutputStream(osd));
}
 
Example 13
Source File: QueryTranslator.java    From samza with Apache License 2.0 5 votes vote down vote up
private void sendToOutputStream(String queryLogicalId, String logicalOpId, String sinkStream,
    StreamApplicationDescriptor appDesc, TranslatorContext translatorContext, RelNode node, int queryId) {
  SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(sinkStream);
  MessageStream<SamzaSqlRelMessage> stream = translatorContext.getMessageStream(node.getId());
  MessageStream<KV<Object, Object>> outputStream =
      stream.map(new OutputMapFunction(queryLogicalId, logicalOpId, sinkStream, queryId));
  Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor();
  if (!tableDescriptor.isPresent()) {
    KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
    String systemName = sinkConfig.getSystemName();
    DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new);
    GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamId(), noOpKVSerde);
    OutputStream stm = outputMsgStreams.computeIfAbsent(sinkConfig.getSource(), v -> appDesc.getOutputStream(osd));
    outputStream.sendTo(stm);

    // Process system events only if the output is a stream.
    if (sqlConfig.isProcessSystemEvents()) {
      for (MessageStream<SamzaSqlInputMessage> inputStream : inputMsgStreams.values()) {
        MessageStream<KV<Object, Object>> systemEventStream =
            inputStream.filter(message -> message.getMetadata().isSystemMessage())
                .map(SamzaSqlInputMessage::getKeyAndMessageKV);

        systemEventStream.sendTo(stm);
      }
    }
  } else {
    Table outputTable = appDesc.getTable(tableDescriptor.get());
    if (outputTable == null) {
      String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource();
      throw new SamzaException(msg);
    }
    outputStream.sendTo(outputTable);
  }
}
 
Example 14
Source File: StreamTableJoinExample.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  Serde<Profile> profileSerde = new JsonSerdeV2<>(Profile.class);
  Serde<PageView> pageViewSerde = new JsonSerdeV2<>(PageView.class);
  Serde<EnrichedPageView> joinResultSerde = new JsonSerdeV2<>(EnrichedPageView.class);

  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor(KAFKA_SYSTEM_NAME)
      .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
      .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
      .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KafkaInputDescriptor<Profile> profileInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(PROFILE_STREAM_ID, profileSerde);
  KafkaInputDescriptor<PageView> pageViewInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(PAGEVIEW_STREAM_ID, pageViewSerde);
  KafkaOutputDescriptor<EnrichedPageView> joinResultOutputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor(OUTPUT_TOPIC, joinResultSerde);

  RocksDbTableDescriptor<String, Profile> profileTableDescriptor =
      new RocksDbTableDescriptor<String, Profile>("profile-table", KVSerde.of(new StringSerde(), profileSerde));

  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  MessageStream<Profile> profileStream = appDescriptor.getInputStream(profileInputDescriptor);
  MessageStream<PageView> pageViewStream = appDescriptor.getInputStream(pageViewInputDescriptor);
  OutputStream<EnrichedPageView> joinResultStream = appDescriptor.getOutputStream(joinResultOutputDescriptor);
  Table<KV<String, Profile>> profileTable = appDescriptor.getTable(profileTableDescriptor);

  profileStream
      .map(profile -> KV.of(profile.userId, profile))
      .sendTo(profileTable);

  pageViewStream
      .partitionBy(pv -> pv.userId, pv -> pv, KVSerde.of(new StringSerde(), pageViewSerde), "join")
      .join(profileTable, new JoinFn())
      .sendTo(joinResultStream);
}
 
Example 15
Source File: ExecutionPlannerTestBase.java    From samza with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
  defaultSerde = KVSerde.of(new StringSerde(), new JsonSerdeV2<>());
  inputSystemDescriptor = new GenericSystemDescriptor("input-system", "mockSystemFactoryClassName");
  outputSystemDescriptor = new GenericSystemDescriptor("output-system", "mockSystemFactoryClassName");
  intermediateSystemDescriptor = new GenericSystemDescriptor("intermediate-system", "mockSystemFactoryClassName");
  input1Descriptor = inputSystemDescriptor.getInputDescriptor("input1", defaultSerde);
  input2Descriptor = inputSystemDescriptor.getInputDescriptor("input2", defaultSerde);
  outputDescriptor = outputSystemDescriptor.getOutputDescriptor("output", defaultSerde);
  intermediateInputDescriptor = intermediateSystemDescriptor.getInputDescriptor("jobName-jobId-partition_by-p1", defaultSerde)
      .withPhysicalName("jobName-jobId-partition_by-p1");
  intermediateOutputDescriptor = intermediateSystemDescriptor.getOutputDescriptor("jobName-jobId-partition_by-p1", defaultSerde)
      .withPhysicalName("jobName-jobId-partition_by-p1");
  broadcastInputDesriptor = intermediateSystemDescriptor.getInputDescriptor("jobName-jobId-broadcast-b1", defaultSerde)
      .withPhysicalName("jobName-jobId-broadcast-b1");

  Map<String, String> configs = new HashMap<>();
  configs.put(JobConfig.JOB_NAME, "jobName");
  configs.put(JobConfig.JOB_ID, "jobId");
  configs.putAll(input1Descriptor.toConfig());
  configs.putAll(input2Descriptor.toConfig());
  configs.putAll(outputDescriptor.toConfig());
  configs.putAll(inputSystemDescriptor.toConfig());
  configs.putAll(outputSystemDescriptor.toConfig());
  configs.putAll(intermediateSystemDescriptor.toConfig());
  configs.put(JobConfig.JOB_DEFAULT_SYSTEM, intermediateSystemDescriptor.getSystemName());
  mockConfig = spy(new MapConfig(configs));

  mockStreamAppDesc = new StreamApplicationDescriptorImpl(getRepartitionJoinStreamApplication(), mockConfig);
}
 
Example 16
Source File: EventHubsOutputDescriptor.java    From samza with Apache License 2.0 5 votes vote down vote up
/**
 * Constructs an {@link OutputDescriptor} instance.
 *
 * @param streamId id of the stream
 * @param namespace namespace for the Event Hubs entity to produce to, not null
 * @param entityPath entity path for the Event Hubs entity to produce to, not null
 * @param valueSerde serde the values in the messages in the stream
 * @param systemDescriptor system descriptor this stream descriptor was obtained from
 */
EventHubsOutputDescriptor(String streamId, String namespace, String entityPath, Serde valueSerde,
    SystemDescriptor systemDescriptor) {
  super(streamId, KVSerde.of(new NoOpSerde<>(), valueSerde), systemDescriptor);
  this.namespace = StringUtils.stripToNull(namespace);
  this.entityPath = StringUtils.stripToNull(entityPath);
  if (this.namespace == null || this.entityPath == null) {
    throw new ConfigException(String.format("Missing namespace and entity path Event Hubs output descriptor in "
        + "system: {%s}, stream: {%s}", getSystemName(), streamId));
  }
}
 
Example 17
Source File: TestLocalTableEndToEnd.java    From samza with Apache License 2.0 4 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDesc) {
  KVSerde<Integer, Profile> profileKVSerde = KVSerde.of(new IntegerSerde(), new ProfileJsonSerde());
  KVSerde<Integer, PageView> pageViewKVSerde = KVSerde.of(new IntegerSerde(), new PageViewJsonSerde());

  PageViewToProfileJoinFunction joinFn1 = new PageViewToProfileJoinFunction();
  PageViewToProfileJoinFunction joinFn2 = new PageViewToProfileJoinFunction();

  Table<KV<Integer, Profile>> profileTable = appDesc.getTable(new InMemoryTableDescriptor("t1", profileKVSerde));

  DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
  GenericInputDescriptor<Profile> profileISD1 = ksd.getInputDescriptor("Profile1", new NoOpSerde<>());
  GenericInputDescriptor<Profile> profileISD2 = ksd.getInputDescriptor("Profile2", new NoOpSerde<>());
  MessageStream<Profile> profileStream1 = appDesc.getInputStream(profileISD1);
  MessageStream<Profile> profileStream2 = appDesc.getInputStream(profileISD2);

  profileStream1
      .map(m -> {
        sentToProfileTable1.add(m);
        return new KV(m.getMemberId(), m);
      })
      .sendTo(profileTable);
  profileStream2
      .map(m -> {
        sentToProfileTable2.add(m);
        return new KV(m.getMemberId(), m);
      })
      .sendTo(profileTable);

  GenericInputDescriptor<PageView> pageViewISD1 = ksd.getInputDescriptor("PageView1", new NoOpSerde<PageView>());
  GenericInputDescriptor<PageView> pageViewISD2 = ksd.getInputDescriptor("PageView2", new NoOpSerde<PageView>());
  MessageStream<PageView> pageViewStream1 = appDesc.getInputStream(pageViewISD1);
  MessageStream<PageView> pageViewStream2 = appDesc.getInputStream(pageViewISD2);

  pageViewStream1
      .partitionBy(PageView::getMemberId, v -> v, pageViewKVSerde, "p1")
      .join(profileTable, joinFn1)
      .sink((m, collector, coordinator) -> joinedPageViews1.add(m));

  pageViewStream2
      .partitionBy(PageView::getMemberId, v -> v, pageViewKVSerde, "p2")
      .join(profileTable, joinFn2)
      .sink((m, collector, coordinator) -> joinedPageViews2.add(m));
}
 
Example 18
Source File: TestJobGraphJsonGenerator.java    From samza with Apache License 2.0 4 votes vote down vote up
@Test
public void testRepartitionedWindowStreamApplication() throws Exception {
  Map<String, String> configMap = new HashMap<>();
  configMap.put(JobConfig.JOB_NAME, "test-app");
  configMap.put(JobConfig.JOB_DEFAULT_SYSTEM, "test-system");
  StreamTestUtils.addStreamConfigs(configMap, "PageView", "hdfs", "hdfs:/user/dummy/PageViewEvent");
  StreamTestUtils.addStreamConfigs(configMap, "PageViewCount", "kafka", "PageViewCount");
  Config config = new MapConfig(configMap);

  // set up external partition count
  Map<String, Integer> system1Map = new HashMap<>();
  system1Map.put("hdfs:/user/dummy/PageViewEvent", 512);
  Map<String, Integer> system2Map = new HashMap<>();
  system2Map.put("PageViewCount", 16);

  SystemAdmin systemAdmin1 = createSystemAdmin(system1Map);
  SystemAdmin systemAdmin2 = createSystemAdmin(system2Map);
  SystemAdmins systemAdmins = mock(SystemAdmins.class);
  when(systemAdmins.getSystemAdmin("hdfs")).thenReturn(systemAdmin1);
  when(systemAdmins.getSystemAdmin("kafka")).thenReturn(systemAdmin2);
  StreamManager streamManager = new StreamManager(systemAdmins);

  StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
    KVSerde<String, PageViewEvent> pvSerde = KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewEvent.class));
    GenericSystemDescriptor isd = new GenericSystemDescriptor("hdfs", "mockSystemFactoryClass");
    GenericInputDescriptor<KV<String, PageViewEvent>> pageView = isd.getInputDescriptor("PageView", pvSerde);

    KVSerde<String, Long> pvcSerde = KVSerde.of(new StringSerde(), new LongSerde());
    GenericSystemDescriptor osd = new GenericSystemDescriptor("kafka", "mockSystemFactoryClass");
    GenericOutputDescriptor<KV<String, Long>> pageViewCount = osd.getOutputDescriptor("PageViewCount", pvcSerde);

    MessageStream<KV<String, PageViewEvent>> inputStream = appDesc.getInputStream(pageView);
    OutputStream<KV<String, Long>> outputStream = appDesc.getOutputStream(pageViewCount);
    inputStream
        .partitionBy(kv -> kv.getValue().getCountry(), kv -> kv.getValue(), pvSerde, "keyed-by-country")
        .window(Windows.keyedTumblingWindow(kv -> kv.getValue().getCountry(),
            Duration.ofSeconds(10L), () -> 0L, (m, c) -> c + 1L, new StringSerde(), new LongSerde()),
            "count-by-country")
        .map(pane -> new KV<>(pane.getKey().getKey(), pane.getMessage()))
        .sendTo(outputStream);
  }, config);

  ExecutionPlanner planner = new ExecutionPlanner(config, streamManager);
  ExecutionPlan plan = planner.plan(graphSpec);
  String json = plan.getPlanAsJson();
  System.out.println(json);

  // deserialize
  ObjectMapper mapper = new ObjectMapper();
  JobGraphJsonGenerator.JobGraphJson nodes = mapper.readValue(json, JobGraphJsonGenerator.JobGraphJson.class);
  JobGraphJsonGenerator.OperatorGraphJson operatorGraphJson = nodes.jobs.get(0).operatorGraph;
  assertEquals(2, operatorGraphJson.inputStreams.size());
  assertEquals(4, operatorGraphJson.operators.size());
  assertEquals(1, nodes.sourceStreams.size());
  assertEquals(1, nodes.sinkStreams.size());
  assertEquals(1, nodes.intermediateStreams.size());

  // verify partitionBy op output to the intermdiate stream of the same id
  assertEquals(operatorGraphJson.operators.get("test-app-1-partition_by-keyed-by-country").get("outputStreamId"),
      "test-app-1-partition_by-keyed-by-country");
  assertEquals(operatorGraphJson.operators.get("test-app-1-send_to-5").get("outputStreamId"),
      "PageViewCount");
}
 
Example 19
Source File: TestJobGraphJsonGenerator.java    From samza with Apache License 2.0 4 votes vote down vote up
@Before
public void setUp() {
  input1Spec = new StreamSpec("input1", "input1", "input-system");
  input2Spec = new StreamSpec("input2", "input2", "input-system");
  outputSpec = new StreamSpec("output", "output", "output-system");
  repartitionSpec =
      new StreamSpec("jobName-jobId-partition_by-p1", "partition_by-p1", "intermediate-system");


  defaultSerde = KVSerde.of(new StringSerde(), new JsonSerdeV2<>());
  inputSystemDescriptor = new GenericSystemDescriptor("input-system", "mockSystemFactoryClassName");
  outputSystemDescriptor = new GenericSystemDescriptor("output-system", "mockSystemFactoryClassName");
  intermediateSystemDescriptor = new GenericSystemDescriptor("intermediate-system", "mockSystemFactoryClassName");
  input1Descriptor = inputSystemDescriptor.getInputDescriptor("input1", defaultSerde);
  input2Descriptor = inputSystemDescriptor.getInputDescriptor("input2", defaultSerde);
  outputDescriptor = outputSystemDescriptor.getOutputDescriptor("output", defaultSerde);
  table1Descriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table1", defaultSerde);
  table2Descriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table2", defaultSerde);

  Map<String, String> configs = new HashMap<>();
  configs.put(JobConfig.JOB_NAME, "jobName");
  configs.put(JobConfig.JOB_ID, "jobId");
  mockConfig = spy(new MapConfig(configs));

  mockJobNode = mock(JobNode.class);
  StreamEdge input1Edge = new StreamEdge(input1Spec, false, false, mockConfig);
  StreamEdge input2Edge = new StreamEdge(input2Spec, false, false, mockConfig);
  StreamEdge outputEdge = new StreamEdge(outputSpec, false, false, mockConfig);
  StreamEdge repartitionEdge = new StreamEdge(repartitionSpec, true, false, mockConfig);
  Map<String, StreamEdge> inputEdges = new HashMap<>();
  inputEdges.put(input1Descriptor.getStreamId(), input1Edge);
  inputEdges.put(input2Descriptor.getStreamId(), input2Edge);
  inputEdges.put(repartitionSpec.getId(), repartitionEdge);
  Map<String, StreamEdge> outputEdges = new HashMap<>();
  outputEdges.put(outputDescriptor.getStreamId(), outputEdge);
  outputEdges.put(repartitionSpec.getId(), repartitionEdge);
  when(mockJobNode.getInEdges()).thenReturn(inputEdges);
  when(mockJobNode.getOutEdges()).thenReturn(outputEdges);
  when(mockJobNode.getConfig()).thenReturn(mockConfig);
  when(mockJobNode.getJobName()).thenReturn("jobName");
  when(mockJobNode.getJobId()).thenReturn("jobId");
  when(mockJobNode.getJobNameAndId()).thenReturn(JobNode.createJobNameAndId("jobName", "jobId"));

  Map<String, TableDescriptor> tables = new HashMap<>();
  tables.put(table1Descriptor.getTableId(), table1Descriptor);
  tables.put(table2Descriptor.getTableId(), table2Descriptor);
  when(mockJobNode.getTables()).thenReturn(tables);
}
 
Example 20
Source File: KinesisInputDescriptor.java    From samza with Apache License 2.0 2 votes vote down vote up
/**
 * Constructs an {@link InputDescriptor} instance.
 *
 * @param streamId id of the stream
 * @param valueSerde serde the values in the messages in the stream
 * @param systemDescriptor system descriptor this stream descriptor was obtained from
 */
<T> KinesisInputDescriptor(String streamId, Serde<T> valueSerde, SystemDescriptor systemDescriptor) {
  super(streamId, KVSerde.of(new NoOpSerde<>(), valueSerde), systemDescriptor, null);
}