org.apache.samza.system.descriptors.GenericSystemDescriptor Java Examples

The following examples show how to use org.apache.samza.system.descriptors.GenericSystemDescriptor. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ImpulseTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void translatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {

  final String outputId = ctx.getOutputId(transform);
  final GenericSystemDescriptor systemDescriptor =
      new GenericSystemDescriptor(outputId, SamzaImpulseSystemFactory.class.getName());

  // The KvCoder is needed here for Samza not to crop the key.
  final Serde<KV<?, OpMessage<byte[]>>> kvSerde = KVSerde.of(new NoOpSerde(), new NoOpSerde<>());
  final GenericInputDescriptor<KV<?, OpMessage<byte[]>>> inputDescriptor =
      systemDescriptor.getInputDescriptor(outputId, kvSerde);

  ctx.registerInputMessageStream(outputId, inputDescriptor);
}
 
Example #2
Source File: TestStreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetInputStreamPreservesInsertionOrder() {
  Config mockConfig = getConfig();

  String testStreamId1 = "test-stream-1";
  String testStreamId2 = "test-stream-2";
  String testStreamId3 = "test-stream-3";

  GenericSystemDescriptor sd = new GenericSystemDescriptor("mockSystem", "mockSystemFactoryClass");
  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
    appDesc.getInputStream(sd.getInputDescriptor(testStreamId1, mock(Serde.class)));
    appDesc.getInputStream(sd.getInputDescriptor(testStreamId2, mock(Serde.class)));
    appDesc.getInputStream(sd.getInputDescriptor(testStreamId3, mock(Serde.class)));
  }, mockConfig);

  List<InputOperatorSpec> inputSpecs = new ArrayList<>(streamAppDesc.getInputOperators().values());
  assertEquals(inputSpecs.size(), 3);
  assertEquals(inputSpecs.get(0).getStreamId(), testStreamId1);
  assertEquals(inputSpecs.get(1).getStreamId(), testStreamId2);
  assertEquals(inputSpecs.get(2).getStreamId(), testStreamId3);
}
 
Example #3
Source File: TestJoinOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test(expected = SamzaException.class)
public void joinWithSelfThrowsException() throws Exception {
  Map<String, String> mapConfig = new HashMap<>();
  mapConfig.put("job.name", "jobName");
  mapConfig.put("job.id", "jobId");
  StreamTestUtils.addStreamConfigs(mapConfig, "inStream", "insystem", "instream");
  Config config = new MapConfig(mapConfig);

  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
    IntegerSerde integerSerde = new IntegerSerde();
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(integerSerde, integerSerde);
    GenericSystemDescriptor sd = new GenericSystemDescriptor("insystem", "mockFactoryClassName");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("inStream", kvSerde);

    MessageStream<KV<Integer, Integer>> inStream = appDesc.getInputStream(inputDescriptor);

    inStream.join(inStream, new TestJoinFunction(), integerSerde, kvSerde, kvSerde, JOIN_TTL, "join");
  }, config);

  createStreamOperatorTask(new SystemClock(), streamAppDesc); // should throw an exception
}
 
Example #4
Source File: TestJoinOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl getTestJoinStreamGraph(TestJoinFunction joinFn) throws IOException {
  Map<String, String> mapConfig = new HashMap<>();
  mapConfig.put("job.name", "jobName");
  mapConfig.put("job.id", "jobId");
  StreamTestUtils.addStreamConfigs(mapConfig, "inStream", "insystem", "instream");
  StreamTestUtils.addStreamConfigs(mapConfig, "inStream2", "insystem", "instream2");
  Config config = new MapConfig(mapConfig);

  return new StreamApplicationDescriptorImpl(appDesc -> {
    IntegerSerde integerSerde = new IntegerSerde();
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(integerSerde, integerSerde);
    GenericSystemDescriptor sd = new GenericSystemDescriptor("insystem", "mockFactoryClassName");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor1 = sd.getInputDescriptor("inStream", kvSerde);
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor2 = sd.getInputDescriptor("inStream2", kvSerde);

    MessageStream<KV<Integer, Integer>> inStream = appDesc.getInputStream(inputDescriptor1);
    MessageStream<KV<Integer, Integer>> inStream2 = appDesc.getInputStream(inputDescriptor2);

    inStream
        .join(inStream2, joinFn, integerSerde, kvSerde, kvSerde, JOIN_TTL, "j1")
        .sink((message, messageCollector, taskCoordinator) -> {
          SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream");
          messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message));
        });
  }, config);
}
 
Example #5
Source File: TestStreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetOutputStreamWithValueSerde() {
  String streamId = "test-stream-1";
  Serde mockValueSerde = mock(Serde.class);
  GenericSystemDescriptor sd = new GenericSystemDescriptor("mockSystem", "mockSystemFactoryClass");
  GenericOutputDescriptor osd = sd.getOutputDescriptor(streamId, mockValueSerde);

  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
    appDesc.getOutputStream(osd);
  }, getConfig());

  OutputStreamImpl<TestMessageEnvelope> outputStreamImpl = streamAppDesc.getOutputStreams().get(streamId);
  assertEquals(streamId, outputStreamImpl.getStreamId());
  assertEquals(osd, streamAppDesc.getOutputDescriptors().get(streamId));
  assertTrue(outputStreamImpl.getKeySerde() instanceof NoOpSerde);
  assertEquals(mockValueSerde, outputStreamImpl.getValueSerde());
}
 
Example #6
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl getKeyedTumblingWindowStreamGraph(AccumulationMode mode,
    Duration duration, Trigger<KV<Integer, Integer>> earlyTrigger) throws IOException {

  StreamApplication userApp = appDesc -> {
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());
    GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde);
    appDesc.getInputStream(inputDescriptor)
        .window(Windows.keyedTumblingWindow(KV::getKey, duration, new IntegerSerde(), kvSerde)
            .setEarlyTrigger(earlyTrigger).setAccumulationMode(mode), "w1")
        .sink((message, messageCollector, taskCoordinator) -> {
          SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream");
          messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message));
        });
  };

  return new StreamApplicationDescriptorImpl(userApp, config);
}
 
Example #7
Source File: TestStreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetOutputStreamWithKeyValueSerde() {
  String streamId = "test-stream-1";
  KVSerde mockKVSerde = mock(KVSerde.class);
  Serde mockKeySerde = mock(Serde.class);
  Serde mockValueSerde = mock(Serde.class);
  doReturn(mockKeySerde).when(mockKVSerde).getKeySerde();
  doReturn(mockValueSerde).when(mockKVSerde).getValueSerde();
  GenericSystemDescriptor sd = new GenericSystemDescriptor("mockSystem", "mockSystemFactoryClass");
  GenericOutputDescriptor osd = sd.getOutputDescriptor(streamId, mockKVSerde);

  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
    appDesc.getOutputStream(osd);
  }, getConfig());

  OutputStreamImpl<TestMessageEnvelope> outputStreamImpl = streamAppDesc.getOutputStreams().get(streamId);
  assertEquals(streamId, outputStreamImpl.getStreamId());
  assertEquals(osd, streamAppDesc.getOutputDescriptors().get(streamId));
  assertEquals(mockKeySerde, outputStreamImpl.getKeySerde());
  assertEquals(mockValueSerde, outputStreamImpl.getValueSerde());
}
 
Example #8
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl getTumblingWindowStreamGraph(AccumulationMode mode,
    Duration duration, Trigger<KV<Integer, Integer>> earlyTrigger) throws IOException {
  StreamApplication userApp = appDesc -> {
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());
    GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde);
    appDesc.getInputStream(inputDescriptor)
        .window(Windows.tumblingWindow(duration, kvSerde).setEarlyTrigger(earlyTrigger)
            .setAccumulationMode(mode), "w1")
        .sink((message, messageCollector, taskCoordinator) -> {
          SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream");
          messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message));
        });
  };

  return new StreamApplicationDescriptorImpl(userApp, config);
}
 
Example #9
Source File: TestStreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testMultipleGetInputStreams() {
  String streamId1 = "test-stream-1";
  String streamId2 = "test-stream-2";
  GenericSystemDescriptor sd = new GenericSystemDescriptor("mockSystem", "mockSystemFactoryClass");
  GenericInputDescriptor isd1 = sd.getInputDescriptor(streamId1, mock(Serde.class));
  GenericInputDescriptor isd2 = sd.getInputDescriptor(streamId2, mock(Serde.class));

  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
    appDesc.getInputStream(isd1);
    appDesc.getInputStream(isd2);
  }, getConfig());

  InputOperatorSpec inputOpSpec1 = streamAppDesc.getInputOperators().get(streamId1);
  InputOperatorSpec inputOpSpec2 = streamAppDesc.getInputOperators().get(streamId2);

  assertEquals(2, streamAppDesc.getInputOperators().size());
  assertEquals(streamId1, inputOpSpec1.getStreamId());
  assertEquals(streamId2, inputOpSpec2.getStreamId());
  assertEquals(2, streamAppDesc.getInputDescriptors().size());
  assertEquals(isd1, streamAppDesc.getInputDescriptors().get(streamId1));
  assertEquals(isd2, streamAppDesc.getInputDescriptors().get(streamId2));
}
 
Example #10
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl getKeyedSessionWindowStreamGraph(AccumulationMode mode, Duration duration) throws IOException {
  StreamApplication userApp = appDesc -> {
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());
    GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde);
    appDesc.getInputStream(inputDescriptor)
        .window(Windows.keyedSessionWindow(KV::getKey, duration, new IntegerSerde(), kvSerde)
            .setAccumulationMode(mode), "w1")
        .sink((message, messageCollector, taskCoordinator) -> {
          SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream");
          messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message));
        });
  };

  return new StreamApplicationDescriptorImpl(userApp, config);
}
 
Example #11
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl getAggregateTumblingWindowStreamGraph(AccumulationMode mode, Duration timeDuration,
      Trigger<IntegerEnvelope> earlyTrigger) throws IOException {
  StreamApplication userApp = appDesc -> {
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());
    GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde);
    MessageStream<KV<Integer, Integer>> integers = appDesc.getInputStream(inputDescriptor);

    integers
        .map(new KVMapFunction())
        .window(Windows.<IntegerEnvelope, Integer>tumblingWindow(timeDuration, () -> 0, (m, c) -> c + 1, new IntegerSerde())
            .setEarlyTrigger(earlyTrigger)
            .setAccumulationMode(mode), "w1")
        .sink((message, messageCollector, taskCoordinator) -> {
          SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream");
          messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message));
        });
  };

  return new StreamApplicationDescriptorImpl(userApp, config);
}
 
Example #12
Source File: TestStreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetInputStreamWithKeyValueSerde() {

  String streamId = "test-stream-1";
  KVSerde mockKVSerde = mock(KVSerde.class);
  Serde mockKeySerde = mock(Serde.class);
  Serde mockValueSerde = mock(Serde.class);
  doReturn(mockKeySerde).when(mockKVSerde).getKeySerde();
  doReturn(mockValueSerde).when(mockKVSerde).getValueSerde();
  GenericSystemDescriptor sd = new GenericSystemDescriptor("mockSystem", "mockSystemFactoryClass");
  GenericInputDescriptor isd = sd.getInputDescriptor(streamId, mockKVSerde);
  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
    appDesc.getInputStream(isd);
  }, getConfig());

  InputOperatorSpec inputOpSpec = streamAppDesc.getInputOperators().get(streamId);
  assertEquals(OpCode.INPUT, inputOpSpec.getOpCode());
  assertEquals(streamId, inputOpSpec.getStreamId());
  assertEquals(isd, streamAppDesc.getInputDescriptors().get(streamId));
  assertEquals(mockKeySerde, inputOpSpec.getKeySerde());
  assertEquals(mockValueSerde, inputOpSpec.getValueSerde());
}
 
Example #13
Source File: TestStreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetInputStreamWithValueSerde() {

  String streamId = "test-stream-1";
  Serde mockValueSerde = mock(Serde.class);
  GenericSystemDescriptor sd = new GenericSystemDescriptor("mockSystem", "mockSystemFactoryClass");
  GenericInputDescriptor isd = sd.getInputDescriptor(streamId, mockValueSerde);
  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
    appDesc.getInputStream(isd);
  }, getConfig());

  InputOperatorSpec inputOpSpec = streamAppDesc.getInputOperators().get(streamId);
  assertEquals(OpCode.INPUT, inputOpSpec.getOpCode());
  assertEquals(streamId, inputOpSpec.getStreamId());
  assertEquals(isd, streamAppDesc.getInputDescriptors().get(streamId));
  assertTrue(inputOpSpec.getKeySerde() instanceof NoOpSerde);
  assertEquals(mockValueSerde, inputOpSpec.getValueSerde());
}
 
Example #14
Source File: TranslationContext.java    From beam with Apache License 2.0 6 votes vote down vote up
/** The dummy stream created will only be used in Beam tests. */
private static InputDescriptor<OpMessage<String>, ?> createDummyStreamDescriptor(String id) {
  final GenericSystemDescriptor dummySystem =
      new GenericSystemDescriptor(id, InMemorySystemFactory.class.getName());
  final GenericInputDescriptor<OpMessage<String>> dummyInput =
      dummySystem.getInputDescriptor(id, new NoOpSerde<>());
  dummyInput.withOffsetDefault(SystemStreamMetadata.OffsetType.OLDEST);
  final Config config = new MapConfig(dummyInput.toConfig(), dummySystem.toConfig());
  final SystemFactory factory = new InMemorySystemFactory();
  final StreamSpec dummyStreamSpec = new StreamSpec(id, id, id, 1);
  factory.getAdmin(id, config).createStream(dummyStreamSpec);

  final SystemProducer producer = factory.getProducer(id, config, null);
  final SystemStream sysStream = new SystemStream(id, id);
  final Consumer<Object> sendFn =
      (msg) -> {
        producer.send(id, new OutgoingMessageEnvelope(sysStream, 0, null, msg));
      };
  final WindowedValue<String> windowedValue =
      WindowedValue.timestampedValueInGlobalWindow("dummy", new Instant());

  sendFn.accept(OpMessage.ofElement(windowedValue));
  sendFn.accept(new WatermarkMessage(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()));
  sendFn.accept(new EndOfStreamMessage(null));
  return dummyInput;
}
 
Example #15
Source File: TestOperatorImplGraph.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testMergeChain() {
  String inputStreamId = "input";
  String inputSystem = "input-system";
  StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
    GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
    GenericInputDescriptor inputDescriptor = sd.getInputDescriptor(inputStreamId, mock(Serde.class));
    MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor);
    MessageStream<Object> stream1 = inputStream.filter(mock(FilterFunction.class));
    MessageStream<Object> stream2 = inputStream.map(mock(MapFunction.class));
    stream1.merge(Collections.singleton(stream2))
        .map(new TestMapFunction<Object, Object>("test-map-1", (Function & Serializable) m -> m));
  }, getConfig());

  TaskName mockTaskName = mock(TaskName.class);
  TaskModel taskModel = mock(TaskModel.class);
  when(taskModel.getTaskName()).thenReturn(mockTaskName);
  when(this.context.getTaskContext().getTaskModel()).thenReturn(taskModel);

  OperatorImplGraph opImplGraph =
      new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));

  Set<OperatorImpl> opSet = opImplGraph.getAllInputOperators().stream().collect(HashSet::new,
    (s, op) -> addOperatorRecursively(s, op), HashSet::addAll);
  Object[] mergeOps = opSet.stream().filter(op -> op.getOperatorSpec().getOpCode() == OpCode.MERGE).toArray();
  assertEquals(1, mergeOps.length);
  assertEquals(1, ((OperatorImpl) mergeOps[0]).registeredOperators.size());
  OperatorImpl mapOp = (OperatorImpl) ((OperatorImpl) mergeOps[0]).registeredOperators.iterator().next();
  assertEquals(mapOp.getOperatorSpec().getOpCode(), OpCode.MAP);

  // verify that the DAG after merge is only traversed & initialized once
  assertEquals(TestMapFunction.getInstanceByTaskName(mockTaskName, "test-map-1").numInitCalled, 1);
}
 
Example #16
Source File: TestOperatorImplGraph.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testBroadcastChain() {
  String inputStreamId = "input";
  String inputSystem = "input-system";
  String inputPhysicalName = "input-stream";
  HashMap<String, String> configMap = new HashMap<>();
  configMap.put(JobConfig.JOB_NAME, "test-job");
  configMap.put(JobConfig.JOB_ID, "1");
  StreamTestUtils.addStreamConfigs(configMap, inputStreamId, inputSystem, inputPhysicalName);
  Config config = new MapConfig(configMap);
  when(this.context.getJobContext().getConfig()).thenReturn(config);
  StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
    GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
    GenericInputDescriptor inputDescriptor = sd.getInputDescriptor(inputStreamId, mock(Serde.class));
    MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor);
    inputStream.filter(mock(FilterFunction.class));
    inputStream.map(mock(MapFunction.class));
  }, config);

  OperatorImplGraph opImplGraph =
      new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));

  InputOperatorImpl inputOpImpl = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName));
  assertEquals(2, inputOpImpl.registeredOperators.size());
  assertTrue(inputOpImpl.registeredOperators.stream()
      .anyMatch(opImpl -> ((OperatorImpl) opImpl).getOperatorSpec().getOpCode() == OpCode.FILTER));
  assertTrue(inputOpImpl.registeredOperators.stream()
      .anyMatch(opImpl -> ((OperatorImpl) opImpl).getOperatorSpec().getOpCode() == OpCode.MAP));
}
 
Example #17
Source File: AzureBlobApplication.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  // Define a system descriptor for Kafka
  KafkaSystemDescriptor kafkaSystemDescriptor =
      new KafkaSystemDescriptor("kafka").withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
          .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
          .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  KafkaInputDescriptor<PageView> pageViewInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor(INPUT_PAGEVIEW_STREAM_ID, new JsonSerdeV2<>(PageView.class));

  // Define a system descriptor for Azure Blob Storage
  GenericSystemDescriptor azureBlobSystemDescriptor =
      new GenericSystemDescriptor(OUTPUT_SYSTEM, "org.apache.samza.system.azureblob.AzureBlobSystemFactory");

  GenericOutputDescriptor<PageViewAvroRecord> azureBlobOuputDescriptor =
      azureBlobSystemDescriptor.getOutputDescriptor(OUTPUT_STREAM, new NoOpSerde<>());

  // Set Kafka as the default system for the job
  appDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  // Define the input and output streams with descriptors
  MessageStream<PageView> pageViewInput = appDescriptor.getInputStream(pageViewInputDescriptor);
  OutputStream<PageViewAvroRecord> pageViewAvroRecordOutputStream = appDescriptor.getOutputStream(azureBlobOuputDescriptor);

  // Define the execution flow with the high-level API
  pageViewInput
      .map((message) -> {
        LOG.info("Sending: Received PageViewEvent with pageId: " + message.pageId);
        return PageViewAvroRecord.buildPageViewRecord(message);
      })
      .sendTo(pageViewAvroRecordOutputStream);
}
 
Example #18
Source File: ExecutionPlannerTestBase.java    From samza with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() {
  defaultSerde = KVSerde.of(new StringSerde(), new JsonSerdeV2<>());
  inputSystemDescriptor = new GenericSystemDescriptor("input-system", "mockSystemFactoryClassName");
  outputSystemDescriptor = new GenericSystemDescriptor("output-system", "mockSystemFactoryClassName");
  intermediateSystemDescriptor = new GenericSystemDescriptor("intermediate-system", "mockSystemFactoryClassName");
  input1Descriptor = inputSystemDescriptor.getInputDescriptor("input1", defaultSerde);
  input2Descriptor = inputSystemDescriptor.getInputDescriptor("input2", defaultSerde);
  outputDescriptor = outputSystemDescriptor.getOutputDescriptor("output", defaultSerde);
  intermediateInputDescriptor = intermediateSystemDescriptor.getInputDescriptor("jobName-jobId-partition_by-p1", defaultSerde)
      .withPhysicalName("jobName-jobId-partition_by-p1");
  intermediateOutputDescriptor = intermediateSystemDescriptor.getOutputDescriptor("jobName-jobId-partition_by-p1", defaultSerde)
      .withPhysicalName("jobName-jobId-partition_by-p1");
  broadcastInputDesriptor = intermediateSystemDescriptor.getInputDescriptor("jobName-jobId-broadcast-b1", defaultSerde)
      .withPhysicalName("jobName-jobId-broadcast-b1");

  Map<String, String> configs = new HashMap<>();
  configs.put(JobConfig.JOB_NAME, "jobName");
  configs.put(JobConfig.JOB_ID, "jobId");
  configs.putAll(input1Descriptor.toConfig());
  configs.putAll(input2Descriptor.toConfig());
  configs.putAll(outputDescriptor.toConfig());
  configs.putAll(inputSystemDescriptor.toConfig());
  configs.putAll(outputSystemDescriptor.toConfig());
  configs.putAll(intermediateSystemDescriptor.toConfig());
  configs.put(JobConfig.JOB_DEFAULT_SYSTEM, intermediateSystemDescriptor.getSystemName());
  mockConfig = spy(new MapConfig(configs));

  mockStreamAppDesc = new StreamApplicationDescriptorImpl(getRepartitionJoinStreamApplication(), mockConfig);
}
 
Example #19
Source File: TestStreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalStateException.class)
public void testSetDefaultSystemDescriptorAfterGettingInputStream() {
  String streamId = "test-stream-1";
  GenericSystemDescriptor sd = new GenericSystemDescriptor("mockSystem", "mockSystemFactoryClass");
  GenericInputDescriptor isd = sd.getInputDescriptor(streamId, mock(Serde.class));

  new StreamApplicationDescriptorImpl(appDesc -> {
    appDesc.getInputStream(isd);
    appDesc.withDefaultSystem(sd); // should throw exception
  }, getConfig());
}
 
Example #20
Source File: SystemConsumerWithSamzaBench.java    From samza with Apache License 2.0 5 votes vote down vote up
public void start() throws IOException, InterruptedException {
  super.start();
  MessageConsumer consumeFn = new MessageConsumer();
  StreamApplication app = appDesc -> {
    String systemFactoryName = new SystemConfig(config).getSystemFactory(systemName).get();
    GenericSystemDescriptor sd = new GenericSystemDescriptor(systemName, systemFactoryName);
    GenericInputDescriptor<Object> isd = sd.getInputDescriptor(streamId, new NoOpSerde<>());
    MessageStream<Object> stream = appDesc.getInputStream(isd);
    stream.map(consumeFn);
  };
  ApplicationRunner runner = ApplicationRunners.getApplicationRunner(app, new MapConfig());

  runner.run();

  while (consumeFn.getEventsConsumed() < totalEvents) {
    Thread.sleep(10);
  }

  Instant endTime = Instant.now();

  runner.kill();

  System.out.println("\n*******************");
  System.out.println(String.format("Started at %s Ending at %s ", consumeFn.startTime, endTime));
  System.out.println(String.format("Event Rate is %s Messages/Sec ",
      consumeFn.getEventsConsumed() * 1000 / Duration.between(consumeFn.startTime, Instant.now()).toMillis()));

  System.out.println(
      "Event Rate is " + consumeFn.getEventsConsumed() * 1000 / Duration.between(consumeFn.startTime, endTime).toMillis());
  System.out.println("*******************\n");

  System.exit(0);
}
 
Example #21
Source File: TestStreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testGetInputStreamWithNullSerde() {
  GenericSystemDescriptor sd = new GenericSystemDescriptor("mockSystem", "mockSystemFactoryClass");
  GenericInputDescriptor isd = sd.getInputDescriptor("mockStreamId", null);
  new StreamApplicationDescriptorImpl(appDesc -> {
    appDesc.getInputStream(isd);
  }, getConfig());
}
 
Example #22
Source File: TestStreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetInputStreamWithRelaxedTypes() {
  String streamId = "test-stream-1";
  GenericSystemDescriptor sd = new GenericSystemDescriptor("mockSystem", "mockSystemFactoryClass");
  GenericInputDescriptor isd = sd.getInputDescriptor(streamId, mock(Serde.class));
  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
    appDesc.getInputStream(isd);
  }, getConfig());

  InputOperatorSpec inputOpSpec = streamAppDesc.getInputOperators().get(streamId);
  assertEquals(OpCode.INPUT, inputOpSpec.getOpCode());
  assertEquals(streamId, inputOpSpec.getStreamId());
  assertEquals(isd, streamAppDesc.getInputDescriptors().get(streamId));
}
 
Example #23
Source File: TestStreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalStateException.class)
public void testGetSameInputStreamTwice() {
  String streamId = "test-stream-1";
  GenericSystemDescriptor sd = new GenericSystemDescriptor("mockSystem", "mockSystemFactoryClass");
  GenericInputDescriptor isd1 = sd.getInputDescriptor(streamId, mock(Serde.class));
  GenericInputDescriptor isd2 = sd.getInputDescriptor(streamId, mock(Serde.class));
  new StreamApplicationDescriptorImpl(appDesc -> {
    appDesc.getInputStream(isd1);
    // should throw exception
    appDesc.getInputStream(isd2);
  }, getConfig());
}
 
Example #24
Source File: TestStreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testGetOutputStreamWithNullSerde() {
  String streamId = "test-stream-1";
  GenericSystemDescriptor sd = new GenericSystemDescriptor("mockSystem", "mockSystemFactoryClass");
  GenericOutputDescriptor osd = sd.getOutputDescriptor(streamId, null);
  new StreamApplicationDescriptorImpl(appDesc -> {
    appDesc.getOutputStream(osd);
  }, getConfig());
}
 
Example #25
Source File: TestStreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalStateException.class)
public void testSetDefaultSystemDescriptorAfterGettingOutputStream() {
  String streamId = "test-stream-1";
  GenericSystemDescriptor sd = new GenericSystemDescriptor("mockSystem", "mockSystemFactoryClass");
  GenericOutputDescriptor osd = sd.getOutputDescriptor(streamId, mock(Serde.class));
  new StreamApplicationDescriptorImpl(appDesc -> {
    appDesc.getOutputStream(osd);
    appDesc.withDefaultSystem(sd); // should throw exception
  }, getConfig());
}
 
Example #26
Source File: TestStreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalStateException.class)
public void testGetSameOutputStreamTwice() {
  String streamId = "test-stream-1";
  GenericSystemDescriptor sd = new GenericSystemDescriptor("mockSystem", "mockSystemFactoryClass");
  GenericOutputDescriptor osd1 = sd.getOutputDescriptor(streamId, mock(Serde.class));
  GenericOutputDescriptor osd2 = sd.getOutputDescriptor(streamId, mock(Serde.class));
  new StreamApplicationDescriptorImpl(appDesc -> {
    appDesc.getOutputStream(osd1);
    appDesc.getOutputStream(osd2); // should throw exception
  }, getConfig());
}
 
Example #27
Source File: TestStreamApplicationDescriptorImpl.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetIntermediateStreamWithDefaultSystemDescriptor() {
  Config mockConfig = getConfig();
  String streamId = "streamId";

  StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> { }, mockConfig);
  GenericSystemDescriptor sd = new GenericSystemDescriptor("mock-system", "mock-system-factory");
  streamAppDesc.withDefaultSystem(sd);
  IntermediateMessageStreamImpl<TestMessageEnvelope> intermediateStreamImpl =
      streamAppDesc.getIntermediateStream(streamId, mock(Serde.class), false);

  assertEquals(streamAppDesc.getInputOperators().get(streamId), intermediateStreamImpl.getOperatorSpec());
  assertEquals(streamAppDesc.getOutputStreams().get(streamId), intermediateStreamImpl.getOutputStream());
  assertEquals(streamId, intermediateStreamImpl.getStreamId());
}
 
Example #28
Source File: TestOperatorImplGraph.java    From samza with Apache License 2.0 4 votes vote down vote up
@Test
public void testPartitionByChain() {
  String inputStreamId = "input";
  String inputSystem = "input-system";
  String inputPhysicalName = "input-stream";
  String outputStreamId = "output";
  String outputSystem = "output-system";
  String outputPhysicalName = "output-stream";
  String intermediateStreamId = "jobName-jobId-partition_by-p1";
  String intermediateSystem = "intermediate-system";

  HashMap<String, String> configs = new HashMap<>();
  configs.put(JobConfig.JOB_NAME, "jobName");
  configs.put(JobConfig.JOB_ID, "jobId");
  configs.put(JobConfig.JOB_DEFAULT_SYSTEM, intermediateSystem);
  StreamTestUtils.addStreamConfigs(configs, inputStreamId, inputSystem, inputPhysicalName);
  StreamTestUtils.addStreamConfigs(configs, outputStreamId, outputSystem, outputPhysicalName);
  Config config = new MapConfig(configs);
  when(this.context.getJobContext().getConfig()).thenReturn(config);

  StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
    GenericSystemDescriptor isd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
    GenericSystemDescriptor osd = new GenericSystemDescriptor(outputSystem, "mockFactoryClass");
    GenericInputDescriptor inputDescriptor = isd.getInputDescriptor(inputStreamId, mock(Serde.class));
    GenericOutputDescriptor outputDescriptor = osd.getOutputDescriptor(outputStreamId,
        KVSerde.of(mock(IntegerSerde.class), mock(StringSerde.class)));
    MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor);
    OutputStream<KV<Integer, String>> outputStream = appDesc.getOutputStream(outputDescriptor);

    inputStream
        .partitionBy(Object::hashCode, Object::toString,
            KVSerde.of(mock(IntegerSerde.class), mock(StringSerde.class)), "p1")
        .sendTo(outputStream);
  }, config);

  JobModel jobModel = mock(JobModel.class);
  ContainerModel containerModel = mock(ContainerModel.class);
  TaskModel taskModel = mock(TaskModel.class);
  when(jobModel.getContainers()).thenReturn(Collections.singletonMap("0", containerModel));
  when(containerModel.getTasks()).thenReturn(Collections.singletonMap(new TaskName("task 0"), taskModel));
  when(taskModel.getSystemStreamPartitions()).thenReturn(Collections.emptySet());
  when(((TaskContextImpl) this.context.getTaskContext()).getJobModel()).thenReturn(jobModel);
  OperatorImplGraph opImplGraph =
      new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));

  InputOperatorImpl inputOpImpl = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName));
  assertEquals(1, inputOpImpl.registeredOperators.size());

  OperatorImpl partitionByOpImpl = (PartitionByOperatorImpl) inputOpImpl.registeredOperators.iterator().next();
  assertEquals(0, partitionByOpImpl.registeredOperators.size()); // is terminal but paired with an input operator
  assertEquals(OpCode.PARTITION_BY, partitionByOpImpl.getOperatorSpec().getOpCode());

  InputOperatorImpl repartitionedInputOpImpl =
      opImplGraph.getInputOperator(new SystemStream(intermediateSystem, intermediateStreamId));
  assertEquals(1, repartitionedInputOpImpl.registeredOperators.size());

  OperatorImpl sendToOpImpl = (OutputOperatorImpl) repartitionedInputOpImpl.registeredOperators.iterator().next();
  assertEquals(0, sendToOpImpl.registeredOperators.size());
  assertEquals(OpCode.SEND_TO, sendToOpImpl.getOperatorSpec().getOpCode());
}
 
Example #29
Source File: TestOperatorImplGraph.java    From samza with Apache License 2.0 4 votes vote down vote up
@Test
public void testLinearChain() {
  String inputStreamId = "input";
  String inputSystem = "input-system";
  String inputPhysicalName = "input-stream";
  String outputStreamId = "output";
  String outputSystem = "output-system";
  String outputPhysicalName = "output-stream";
  String intermediateSystem = "intermediate-system";

  HashMap<String, String> configs = new HashMap<>();
  configs.put(JobConfig.JOB_NAME, "jobName");
  configs.put(JobConfig.JOB_ID, "jobId");
  configs.put(JobConfig.JOB_DEFAULT_SYSTEM, intermediateSystem);
  StreamTestUtils.addStreamConfigs(configs, inputStreamId, inputSystem, inputPhysicalName);
  StreamTestUtils.addStreamConfigs(configs, outputStreamId, outputSystem, outputPhysicalName);
  Config config = new MapConfig(configs);
  when(this.context.getJobContext().getConfig()).thenReturn(config);

  StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
    GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
    GenericInputDescriptor inputDescriptor = sd.getInputDescriptor(inputStreamId, mock(Serde.class));
    GenericOutputDescriptor outputDescriptor = sd.getOutputDescriptor(outputStreamId, mock(Serde.class));
    MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor);
    OutputStream<Object> outputStream = appDesc.getOutputStream(outputDescriptor);

    inputStream
        .filter(mock(FilterFunction.class))
        .map(mock(MapFunction.class))
        .sendTo(outputStream);
  }, config);

  OperatorImplGraph opImplGraph =
      new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));

  InputOperatorImpl inputOpImpl = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName));
  assertEquals(1, inputOpImpl.registeredOperators.size());

  OperatorImpl filterOpImpl = (FlatmapOperatorImpl) inputOpImpl.registeredOperators.iterator().next();
  assertEquals(1, filterOpImpl.registeredOperators.size());
  assertEquals(OpCode.FILTER, filterOpImpl.getOperatorSpec().getOpCode());

  OperatorImpl mapOpImpl = (FlatmapOperatorImpl) filterOpImpl.registeredOperators.iterator().next();
  assertEquals(1, mapOpImpl.registeredOperators.size());
  assertEquals(OpCode.MAP, mapOpImpl.getOperatorSpec().getOpCode());

  OperatorImpl sendToOpImpl = (OutputOperatorImpl) mapOpImpl.registeredOperators.iterator().next();
  assertEquals(0, sendToOpImpl.registeredOperators.size());
  assertEquals(OpCode.SEND_TO, sendToOpImpl.getOperatorSpec().getOpCode());
}
 
Example #30
Source File: TestJobGraphJsonGenerator.java    From samza with Apache License 2.0 4 votes vote down vote up
@Test
public void testRepartitionedJoinStreamApplication() throws Exception {

  /**
   * the graph looks like the following.
   * number in parentheses () indicates number of stream partitions.
   * number in parentheses in quotes ("") indicates expected partition count.
   * number in square brackets [] indicates operator ID.
   *
   * input3 (32) -> filter [7] -> partitionBy [8] ("64") -> map [10] -> join [14] -> sendTo(output2) [15] (16)
   *                                                                   |
   *              input2 (16) -> partitionBy [3] ("64") -> filter [5] -| -> sink [13]
   *                                                                   |
   *                                         input1 (64) -> map [1] -> join [11] -> sendTo(output1) [12] (8)
   *
   */

  Map<String, String> configMap = new HashMap<>();
  configMap.put(JobConfig.JOB_NAME, "test-app");
  configMap.put(JobConfig.JOB_DEFAULT_SYSTEM, "test-system");
  StreamTestUtils.addStreamConfigs(configMap, "input1", "system1", "input1");
  StreamTestUtils.addStreamConfigs(configMap, "input2", "system2", "input2");
  StreamTestUtils.addStreamConfigs(configMap, "input3", "system2", "input3");
  StreamTestUtils.addStreamConfigs(configMap, "output1", "system1", "output1");
  StreamTestUtils.addStreamConfigs(configMap, "output2", "system2", "output2");
  Config config = new MapConfig(configMap);

  // set up external partition count
  Map<String, Integer> system1Map = new HashMap<>();
  system1Map.put("input1", 64);
  system1Map.put("output1", 8);
  Map<String, Integer> system2Map = new HashMap<>();
  system2Map.put("input2", 16);
  system2Map.put("input3", 32);
  system2Map.put("output2", 16);

  SystemAdmin systemAdmin1 = createSystemAdmin(system1Map);
  SystemAdmin systemAdmin2 = createSystemAdmin(system2Map);
  SystemAdmins systemAdmins = mock(SystemAdmins.class);
  when(systemAdmins.getSystemAdmin("system1")).thenReturn(systemAdmin1);
  when(systemAdmins.getSystemAdmin("system2")).thenReturn(systemAdmin2);
  StreamManager streamManager = new StreamManager(systemAdmins);

  StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
    KVSerde<Object, Object> kvSerde = new KVSerde<>(new NoOpSerde(), new NoOpSerde());
    String mockSystemFactoryClass = "factory.class.name";
    GenericSystemDescriptor system1 = new GenericSystemDescriptor("system1", mockSystemFactoryClass);
    GenericSystemDescriptor system2 = new GenericSystemDescriptor("system2", mockSystemFactoryClass);
    GenericInputDescriptor<KV<Object, Object>> input1Descriptor = system1.getInputDescriptor("input1", kvSerde);
    GenericInputDescriptor<KV<Object, Object>> input2Descriptor = system2.getInputDescriptor("input2", kvSerde);
    GenericInputDescriptor<KV<Object, Object>> input3Descriptor = system2.getInputDescriptor("input3", kvSerde);
    GenericOutputDescriptor<KV<Object, Object>>  output1Descriptor = system1.getOutputDescriptor("output1", kvSerde);
    GenericOutputDescriptor<KV<Object, Object>> output2Descriptor = system2.getOutputDescriptor("output2", kvSerde);

    MessageStream<KV<Object, Object>> messageStream1 =
        appDesc.getInputStream(input1Descriptor)
            .map(m -> m);
    MessageStream<KV<Object, Object>> messageStream2 =
        appDesc.getInputStream(input2Descriptor)
            .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1")
            .filter(m -> true);
    MessageStream<KV<Object, Object>> messageStream3 =
        appDesc.getInputStream(input3Descriptor)
            .filter(m -> true)
            .partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p2")
            .map(m -> m);
    OutputStream<KV<Object, Object>> outputStream1 = appDesc.getOutputStream(output1Descriptor);
    OutputStream<KV<Object, Object>> outputStream2 = appDesc.getOutputStream(output2Descriptor);

    messageStream1
        .join(messageStream2,
            (JoinFunction<Object, KV<Object, Object>, KV<Object, Object>, KV<Object, Object>>) mock(JoinFunction.class),
            mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(2), "j1")
        .sendTo(outputStream1);
    messageStream2.sink((message, collector, coordinator) -> { });
    messageStream3
        .join(messageStream2,
            (JoinFunction<Object, KV<Object, Object>, KV<Object, Object>, KV<Object, Object>>) mock(JoinFunction.class),
            mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2")
        .sendTo(outputStream2);
  }, config);

  ExecutionPlanner planner = new ExecutionPlanner(config, streamManager);
  ExecutionPlan plan = planner.plan(graphSpec);
  String json = plan.getPlanAsJson();
  System.out.println(json);

  // deserialize
  ObjectMapper mapper = new ObjectMapper();
  JobGraphJsonGenerator.JobGraphJson nodes = mapper.readValue(json, JobGraphJsonGenerator.JobGraphJson.class);
  assertEquals(5, nodes.jobs.get(0).operatorGraph.inputStreams.size());
  assertEquals(11, nodes.jobs.get(0).operatorGraph.operators.size());
  assertEquals(3, nodes.sourceStreams.size());
  assertEquals(2, nodes.sinkStreams.size());
  assertEquals(2, nodes.intermediateStreams.size());
}