org.apache.samza.task.StreamTaskFactory Java Examples

The following examples show how to use org.apache.samza.task.StreamTaskFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TransactionalStateIntegrationTest.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(INPUT_SYSTEM);
  KVSerde<String, String> serde = KVSerde.of(new StringSerde(), new StringSerde());

  KafkaInputDescriptor<KV<String, String>> isd = ksd.getInputDescriptor(INPUT_TOPIC, serde);

  RocksDbTableDescriptor<String, String> td = new RocksDbTableDescriptor<>(STORE_NAME, serde)
      .withChangelogStream(changelogTopic)
      .withChangelogReplicationFactor(1);

  appDescriptor
      .withInputStream(isd)
      .withTaskFactory((StreamTaskFactory) () -> new MyTask())
      .withTable(td);
}
 
Example #2
Source File: TransactionalStateMultiStoreIntegrationTest.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(INPUT_SYSTEM);
  KVSerde<String, String> serde = KVSerde.of(new StringSerde(), new StringSerde());

  KafkaInputDescriptor<KV<String, String>> isd = ksd.getInputDescriptor(INPUT_TOPIC, serde);

  RocksDbTableDescriptor<String, String> td1 = new RocksDbTableDescriptor<>(STORE_1_NAME, serde)
      .withChangelogStream(changelogTopic)
      .withChangelogReplicationFactor(1);

  RocksDbTableDescriptor<String, String> td2 = new RocksDbTableDescriptor<>(STORE_2_NAME, serde)
      .withChangelogStream(STORE_2_CHANGELOG)
      .withChangelogReplicationFactor(1);

  appDescriptor
      .withInputStream(isd)
      .withTaskFactory((StreamTaskFactory) () -> new MyTask())
      .withTable(td1)
      .withTable(td2);
}
 
Example #3
Source File: TestStreamProcessor.java    From samza with Apache License 2.0 5 votes vote down vote up
/**
 * Should fail to create a SamzaContainer when neither task factory nor task.class are provided.
 */
@Test(expected = SamzaException.class)
public void testStreamProcessorWithNoTask() {
  final String testSystem = "test-system";
  final String inputTopic = "numbers4";
  final String outputTopic = "output4";
  final int messageCount = 20;

  final Map<String, String> configMap = createConfigs(PROCESSOR_ID, testSystem, inputTopic, outputTopic, messageCount);
  configMap.remove("task.class");
  final Config configs = new MapConfig(configMap);
  final TestStubs stubs = new TestStubs(configs, (StreamTaskFactory) null, bootstrapServers());

  run(stubs.processor, stubs.shutdownLatch);
}
 
Example #4
Source File: WikipediaStatsTaskApplication.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor taskApplicationDescriptor) {

  // Define a system descriptor for Kafka
  KafkaSystemDescriptor kafkaSystemDescriptor = new KafkaSystemDescriptor("kafka")
      .withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
      .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
      .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  // Input descriptor for the wikipedia-edits topic
  KafkaInputDescriptor kafkaInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor("wikipedia-edits", new JsonSerde<>());

  // Set the default system descriptor to Kafka, so that it is used for all
  // internal resources, e.g., kafka topic for checkpointing, coordinator stream.
  taskApplicationDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  // Set the input
  taskApplicationDescriptor.withInputStream(kafkaInputDescriptor);

  // Set the output
  taskApplicationDescriptor.withOutputStream(
      kafkaSystemDescriptor.getOutputDescriptor("wikipedia-stats", new JsonSerde<>()));

  // Set the task factory
  taskApplicationDescriptor.withTaskFactory((StreamTaskFactory) () -> new WikipediaStatsStreamTask());
}
 
Example #5
Source File: WikipediaParserTaskApplication.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor taskApplicationDescriptor) {

  // Define a system descriptor for Kafka, which is both our input and output system
  KafkaSystemDescriptor kafkaSystemDescriptor =
      new KafkaSystemDescriptor("kafka").withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
          .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
          .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  // Input descriptor for the wikipedia-raw topic
  KafkaInputDescriptor kafkaInputDescriptor =
      kafkaSystemDescriptor.getInputDescriptor("wikipedia-raw", new JsonSerde<>());

  // Output descriptor for the wikipedia-edits topic
  KafkaOutputDescriptor kafkaOutputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor("wikipedia-edits", new JsonSerde<>());

  // Set the default system descriptor to Kafka, so that it is used for all
  // internal resources, e.g., kafka topic for checkpointing, coordinator stream.
  taskApplicationDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  // Set the input
  taskApplicationDescriptor.withInputStream(kafkaInputDescriptor);

  // Set the output
  taskApplicationDescriptor.withOutputStream(kafkaOutputDescriptor);

  // Set the task factory
  taskApplicationDescriptor.withTaskFactory((StreamTaskFactory) () -> new WikipediaParserStreamTask());
}
 
Example #6
Source File: WikipediaFeedTaskApplication.java    From samza-hello-samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor taskApplicationDescriptor) {

  // Define a SystemDescriptor for Wikipedia data
  WikipediaSystemDescriptor wikipediaSystemDescriptor = new WikipediaSystemDescriptor("irc.wikimedia.org", 6667);

  // Define InputDescriptors for consuming wikipedia data
  WikipediaInputDescriptor wikipediaInputDescriptor =
      wikipediaSystemDescriptor.getInputDescriptor("en-wikipedia").withChannel("#en.wikipedia");
  WikipediaInputDescriptor wiktionaryInputDescriptor =
      wikipediaSystemDescriptor.getInputDescriptor("en-wiktionary").withChannel("#en.wiktionary");
  WikipediaInputDescriptor wikiNewsInputDescriptor =
      wikipediaSystemDescriptor.getInputDescriptor("en-wikinews").withChannel("#en.wikinews");

  // Define a system descriptor for Kafka, which is our output system
  KafkaSystemDescriptor kafkaSystemDescriptor =
      new KafkaSystemDescriptor("kafka").withConsumerZkConnect(KAFKA_CONSUMER_ZK_CONNECT)
          .withProducerBootstrapServers(KAFKA_PRODUCER_BOOTSTRAP_SERVERS)
          .withDefaultStreamConfigs(KAFKA_DEFAULT_STREAM_CONFIGS);

  // Define an output descriptor
  KafkaOutputDescriptor kafkaOutputDescriptor =
      kafkaSystemDescriptor.getOutputDescriptor("wikipedia-raw", new JsonSerde<>());

  // Set the default system descriptor to Kafka, so that it is used for all
  // internal resources, e.g., kafka topic for checkpointing, coordinator stream.
  taskApplicationDescriptor.withDefaultSystem(kafkaSystemDescriptor);

  // Set the inputs
  taskApplicationDescriptor.withInputStream(wikipediaInputDescriptor);
  taskApplicationDescriptor.withInputStream(wiktionaryInputDescriptor);
  taskApplicationDescriptor.withInputStream(wikiNewsInputDescriptor);

  // Set the output
  taskApplicationDescriptor.withOutputStream(kafkaOutputDescriptor);

  // Set the task factory
  taskApplicationDescriptor.withTaskFactory((StreamTaskFactory) () -> new WikipediaFeedStreamTask());
}
 
Example #7
Source File: FaultInjectionTest.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
  Config config = appDescriptor.getConfig();
  String inputTopic = config.get(INPUT_TOPIC_NAME_PROP);

  final JsonSerdeV2<PageView> serde = new JsonSerdeV2<>(PageView.class);
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
  KafkaInputDescriptor<PageView> isd = ksd.getInputDescriptor(inputTopic, serde);
  appDescriptor
      .withInputStream(isd)
      .withTaskFactory((StreamTaskFactory) () -> new FaultInjectionTask(containerShutdownLatch));
}
 
Example #8
Source File: StreamTaskIntegrationTest.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("test");
  KafkaInputDescriptor<Profile> profileISD = ksd.getInputDescriptor("Profile", new JsonSerdeV2<>());
  KafkaInputDescriptor<PageView> pageViewISD = ksd.getInputDescriptor("PageView", new JsonSerdeV2<>());
  KafkaOutputDescriptor<EnrichedPageView> enrichedPageViewOSD =
      ksd.getOutputDescriptor("EnrichedPageView", new NoOpSerde<>());
  appDescriptor
      .withInputStream(profileISD)
      .withInputStream(pageViewISD)
      .withOutputStream(enrichedPageViewOSD)
      .withTable(new InMemoryTableDescriptor("profile-view-store",
          KVSerde.of(new IntegerSerde(), new TestTableData.ProfileJsonSerde())))
      .withTaskFactory((StreamTaskFactory) () -> new StatefulStreamTask());
}
 
Example #9
Source File: TestLocalTableWithConfigRewriterEndToEnd.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
  DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
  GenericInputDescriptor<TestTableData.PageView> pageViewISD = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
  appDescriptor
      .withInputStream(pageViewISD)
      .withTaskFactory((StreamTaskFactory) () -> new MyStreamTask());
}
 
Example #10
Source File: TestLocalTableWithLowLevelApiEndToEnd.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
  DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
  GenericInputDescriptor<TestTableData.PageView> pageViewISD = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
  appDescriptor
      .withInputStream(pageViewISD)
      .withTable(new InMemoryTableDescriptor("t1", KVSerde.of(new IntegerSerde(), new TestTableData.PageViewJsonSerde())))
      .withTaskFactory((StreamTaskFactory) () -> new MyStreamTask());
}
 
Example #11
Source File: TestStreamProcessor.java    From samza with Apache License 2.0 5 votes vote down vote up
public TestableStreamProcessor(Config config,
    Map<String, MetricsReporter> customMetricsReporters,
    StreamTaskFactory streamTaskFactory,
    ProcessorLifecycleListener processorListener,
    JobCoordinator jobCoordinator,
    SamzaContainer container) {
  this(config, customMetricsReporters, streamTaskFactory, processorListener, jobCoordinator, container,
      Duration.ZERO);
}
 
Example #12
Source File: TaskApplicationExample.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
  // add input and output streams
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("tracking");
  KafkaInputDescriptor<String> isd = ksd.getInputDescriptor("myinput", new StringSerde());
  KafkaOutputDescriptor<String> osd = ksd.getOutputDescriptor("myout", new StringSerde());
  TableDescriptor td = new RocksDbTableDescriptor("mytable",
      new KVSerde(new NoOpSerde(), new NoOpSerde()));

  appDescriptor
      .withInputStream(isd)
      .withOutputStream(osd)
      .withTable(td)
      .withTaskFactory((StreamTaskFactory) () -> new MyStreamTask());
}
 
Example #13
Source File: TestStreamProcessor.java    From samza with Apache License 2.0 5 votes vote down vote up
public TestableStreamProcessor(Config config,
    Map<String, MetricsReporter> customMetricsReporters,
    StreamTaskFactory streamTaskFactory,
    ProcessorLifecycleListener processorListener,
    JobCoordinator jobCoordinator,
    SamzaContainer container,
    Duration runLoopShutdownDuration) {

  super("TEST_PROCESSOR_ID", config, customMetricsReporters, streamTaskFactory, Optional.empty(), Optional.empty(), Optional.empty(), sp -> processorListener,
      jobCoordinator, Mockito.mock(MetadataStore.class));
  this.container = container;
  this.runLoopShutdownDuration = runLoopShutdownDuration;
}
 
Example #14
Source File: TestStreamProcessor.java    From samza with Apache License 2.0 4 votes vote down vote up
TestStubs(Config config, StreamTaskFactory taskFactory, String bootstrapServer) {
  this(bootstrapServer);
  processor = new StreamProcessor("1", config, new HashMap<>(), taskFactory, listener);
}
 
Example #15
Source File: TestZkStreamProcessorBase.java    From samza with Apache License 2.0 4 votes vote down vote up
protected StreamProcessor createStreamProcessor(final String pId, Map<String, String> map, final CountDownLatch waitStart,
    final CountDownLatch waitStop) {
  map.put(ApplicationConfig.PROCESSOR_ID, pId);
  Config config = new MapConfig(map);
  String jobCoordinatorFactoryClassName = new JobCoordinatorConfig(config).getJobCoordinatorFactoryClassName();
  JobCoordinator jobCoordinator = ReflectionUtil.getObj(jobCoordinatorFactoryClassName, JobCoordinatorFactory.class)
      .getJobCoordinator(pId, config, new MetricsRegistryMap(), Mockito.mock(CoordinatorStreamStore.class));

  ProcessorLifecycleListener listener = new ProcessorLifecycleListener() {
    @Override
    public void beforeStart() {

    }

    @Override
    public void afterStart() {
      if (waitStart != null) {
          waitStart.countDown();
      }
      LOG.info("onStart is called for pid=" + pId);
    }

    @Override
    public void afterStop() {
      // stopped w/o failure
      if (waitStop != null) {
        waitStop.countDown();
      }
      LOG.info("afterStop is called for pid=" + pId + " with successful shutdown");
    }

    @Override
    public void afterFailure(Throwable t) {
      // stopped w/ failure
      LOG.info("afterStop is called for pid=" + pId + " with failure");
    }
  };

  StreamProcessor processor =
      new StreamProcessor(pId, config, new HashMap<>(), (StreamTaskFactory) TestStreamTask::new, listener, jobCoordinator);

  return processor;
}
 
Example #16
Source File: TestStreamProcessor.java    From samza with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that a failure in container correctly stops a running JobCoordinator and propagates the exception
 * through the StreamProcessor
 *
 * Assertions:
 * - JobCoordinator has been stopped from the JobCoordinatorListener callback
 * - ProcessorLifecycleListener#afterStop(Throwable) has been invoked w/ non-null Throwable
 */
@Test
public void testContainerFailureCorrectlyStopsProcessor() throws InterruptedException {
  JobCoordinator mockJobCoordinator = Mockito.mock(JobCoordinator.class);
  Throwable expectedThrowable =  new SamzaException("Failure in Container!");
  AtomicReference<Throwable> actualThrowable = new AtomicReference<>();
  final CountDownLatch runLoopStartedLatch = new CountDownLatch(1);
  RunLoop failingRunLoop = mock(RunLoop.class);
  doAnswer(invocation -> {
    try {
      runLoopStartedLatch.countDown();
      throw expectedThrowable;
    } catch (InterruptedException ie) {
      ie.printStackTrace();
    }
    return null;
  }).when(failingRunLoop).run();

  SamzaContainer mockContainer = StreamProcessorTestUtils.getDummyContainer(failingRunLoop, mock(StreamTask.class));
  final CountDownLatch processorListenerFailed = new CountDownLatch(1);

  TestableStreamProcessor processor = new TestableStreamProcessor(
      new MapConfig(),
      new HashMap<>(),
      mock(StreamTaskFactory.class),
      new ProcessorLifecycleListener() {
        @Override
        public void beforeStart() {
          processorListenerState.put(ListenerCallback.BEFORE_START, true);
        }

        @Override
        public void afterStart() {
          processorListenerState.put(ListenerCallback.AFTER_START, true);
        }

        @Override
        public void afterStop() {
          processorListenerState.put(ListenerCallback.AFTER_STOP, true);
        }

        @Override
        public void afterFailure(Throwable t) {
          processorListenerState.put(ListenerCallback.AFTER_FAILURE, true);
          actualThrowable.getAndSet(t);
          processorListenerFailed.countDown();
        }
      },
      mockJobCoordinator,
      mockContainer);

  final CountDownLatch coordinatorStop = new CountDownLatch(1);
  doAnswer(invocation -> {
    coordinatorStop.countDown();
    return null;
  }).when(mockJobCoordinator).stop();

  doAnswer(invocation -> {
    new Thread(() -> {
      try {
        processor.jobCoordinatorListener.onJobModelExpired();
        processor.jobCoordinatorListener.onNewJobModel("1", getMockJobModel());
        coordinatorStop.await();
        processor.jobCoordinatorListener.onCoordinatorStop();
      } catch (InterruptedException e) {
        e.printStackTrace();
      }
    }).start();
    return null;
  }).when(mockJobCoordinator).start();

  processor.start();

  // This block is required for the mockRunloop is actually started.
  // Otherwise, processor.stop gets triggered before mockRunloop begins to block
  runLoopStartedLatch.await();
  assertTrue(
      "Container failed and processor listener failed was not invoked within timeout!",
      processorListenerFailed.await(30, TimeUnit.SECONDS));
  assertEquals(expectedThrowable, actualThrowable.get());

  assertTrue(processorListenerState.get(ListenerCallback.BEFORE_START));
  assertTrue(processorListenerState.get(ListenerCallback.AFTER_START));
  Assert.assertFalse(processorListenerState.get(ListenerCallback.AFTER_STOP));
  assertTrue(processorListenerState.get(ListenerCallback.AFTER_FAILURE));
}
 
Example #17
Source File: TestStreamProcessor.java    From samza with Apache License 2.0 4 votes vote down vote up
/**
 * Given that the job model expires, but the container takes too long to stop, a TimeoutException should be propagated
 * to the processor lifecycle listener.
 */
@Test
public void testJobModelExpiredContainerShutdownTimeout() throws InterruptedException {
  JobCoordinator mockJobCoordinator = mock(JobCoordinator.class);
  // use this to store the exception passed to afterFailure for the processor lifecycle listener
  AtomicReference<Throwable> afterFailureException = new AtomicReference<>(null);
  TestableStreamProcessor processor = new TestableStreamProcessor(
      // set a small shutdown timeout so it triggers faster
      new MapConfig(ImmutableMap.of(TaskConfig.TASK_SHUTDOWN_MS, "1")),
      new HashMap<>(),
      mock(StreamTaskFactory.class),
      new ProcessorLifecycleListener() {
        @Override
        public void beforeStart() { }

        @Override
        public void afterStart() { }

        @Override
        public void afterFailure(Throwable t) {
          afterFailureException.set(t);
        }

        @Override
        public void afterStop() { }
      },
      mockJobCoordinator,
      null,
      // take an extra second to shut down so that task shutdown timeout gets reached
      Duration.of(1, ChronoUnit.SECONDS));

  Thread jcThread = new Thread(() -> {
    // gets processor into rebalance mode so onNewJobModel creates a new container
    processor.jobCoordinatorListener.onJobModelExpired();
    processor.jobCoordinatorListener.onNewJobModel("1", getMockJobModel());
    try {
      // wait for the run loop to be ready before triggering rebalance
      processor.runLoopStartForMain.await();
    } catch (InterruptedException e) {
      e.printStackTrace();
    }
    processor.jobCoordinatorListener.onJobModelExpired();
  });
  doAnswer(invocation -> {
    jcThread.start();
    return null;
  }).when(mockJobCoordinator).start();

  // ensure that the coordinator stop occurred before checking the exception being thrown
  CountDownLatch coordinatorStop = new CountDownLatch(1);
  doAnswer(invocation -> {
    processor.jobCoordinatorListener.onCoordinatorStop();
    coordinatorStop.countDown();
    return null;
  }).when(mockJobCoordinator).stop();

  processor.start();

  // make sure the job model expired callback completed
  assertTrue("Job coordinator stop not called", coordinatorStop.await(10, TimeUnit.SECONDS));
  assertNotNull(afterFailureException.get());
  assertTrue(afterFailureException.get() instanceof TimeoutException);
}
 
Example #18
Source File: TestStreamProcessor.java    From samza with Apache License 2.0 4 votes vote down vote up
/**
 * Tests stop() method when Container AND JobCoordinator are running
 */
@Test
public void testStopByProcessor() throws InterruptedException {
  JobCoordinator mockJobCoordinator = mock(JobCoordinator.class);

  final CountDownLatch processorListenerStop = new CountDownLatch(1);
  final CountDownLatch processorListenerStart = new CountDownLatch(1);

  TestableStreamProcessor processor = new TestableStreamProcessor(
      new MapConfig(),
      new HashMap<>(),
      mock(StreamTaskFactory.class),
      new ProcessorLifecycleListener() {
        @Override
        public void afterStart() {
          processorListenerState.put(ListenerCallback.AFTER_START, true);
          processorListenerStart.countDown();
        }

        @Override
        public void afterFailure(Throwable t) {
          processorListenerState.put(ListenerCallback.AFTER_FAILURE, true);
        }

        @Override
        public void afterStop() {
          processorListenerState.put(ListenerCallback.AFTER_STOP, true);
          processorListenerStop.countDown();
        }

        @Override
        public void beforeStart() {
          processorListenerState.put(ListenerCallback.BEFORE_START, true);
        }
      },
      mockJobCoordinator,
      null);

  final CountDownLatch coordinatorStop = new CountDownLatch(1);
  final Thread jcThread = new Thread(() -> {
    try {
      processor.jobCoordinatorListener.onJobModelExpired();
      processor.jobCoordinatorListener.onNewJobModel("1", getMockJobModel());
      coordinatorStop.await();
      processor.jobCoordinatorListener.onCoordinatorStop();
    } catch (InterruptedException e) {
      e.printStackTrace();
    }
  });

  doAnswer(invocation -> {
    coordinatorStop.countDown();
    return null;
  }).when(mockJobCoordinator).stop();

  doAnswer(invocation -> {
    jcThread.start();
    return null;
  }).when(mockJobCoordinator).start();

  processor.start();
  processorListenerStart.await(10, TimeUnit.SECONDS);

  assertEquals(SamzaContainerStatus.STARTED, processor.getContainerStatus());

  // This block is required for the mockRunloop is actually start.
  // Otherwise, processor.stop gets triggered before mockRunloop begins to block
  processor.runLoopStartForMain.await();

  processor.stop();

  processorListenerStop.await();

  // Assertions on which callbacks are expected to be invoked
  assertTrue(processorListenerState.get(ListenerCallback.BEFORE_START));
  assertTrue(processorListenerState.get(ListenerCallback.AFTER_START));
  assertTrue(processorListenerState.get(ListenerCallback.AFTER_STOP));
  Assert.assertFalse(processorListenerState.get(ListenerCallback.AFTER_FAILURE));
}