org.apache.samza.system.SystemStream Java Examples

The following examples show how to use org.apache.samza.system.SystemStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl getKeyedSessionWindowStreamGraph(AccumulationMode mode, Duration duration) throws IOException {
  StreamApplication userApp = appDesc -> {
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());
    GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde);
    appDesc.getInputStream(inputDescriptor)
        .window(Windows.keyedSessionWindow(KV::getKey, duration, new IntegerSerde(), kvSerde)
            .setAccumulationMode(mode), "w1")
        .sink((message, messageCollector, taskCoordinator) -> {
          SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream");
          messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message));
        });
  };

  return new StreamApplicationDescriptorImpl(userApp, config);
}
 
Example #2
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
private StreamApplicationDescriptorImpl getAggregateTumblingWindowStreamGraph(AccumulationMode mode, Duration timeDuration,
      Trigger<IntegerEnvelope> earlyTrigger) throws IOException {
  StreamApplication userApp = appDesc -> {
    KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());
    GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass");
    GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde);
    MessageStream<KV<Integer, Integer>> integers = appDesc.getInputStream(inputDescriptor);

    integers
        .map(new KVMapFunction())
        .window(Windows.<IntegerEnvelope, Integer>tumblingWindow(timeDuration, () -> 0, (m, c) -> c + 1, new IntegerSerde())
            .setEarlyTrigger(earlyTrigger)
            .setAccumulationMode(mode), "w1")
        .sink((message, messageCollector, taskCoordinator) -> {
          SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream");
          messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message));
        });
  };

  return new StreamApplicationDescriptorImpl(userApp, config);
}
 
Example #3
Source File: TestCoordinatorStreamSystemConsumer.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testCoordinatorStreamSystemConsumerRegisterOnceOnly() throws Exception {
  Map<String, String> expectedConfig = new LinkedHashMap<String, String>();
  expectedConfig.put("job.id", "1234");
  SystemStream systemStream = new SystemStream("system", "stream");
  MockSystemConsumer systemConsumer = new MockSystemConsumer(new SystemStreamPartition(systemStream, new Partition(0)));
  CoordinatorStreamSystemConsumer consumer = new CoordinatorStreamSystemConsumer(systemStream, systemConsumer, new SinglePartitionWithoutOffsetsSystemAdmin());
  assertEquals(0, systemConsumer.getRegisterCount());
  consumer.register();
  assertEquals(1, systemConsumer.getRegisterCount());
  assertFalse(systemConsumer.isStarted());
  consumer.start();
  assertTrue(systemConsumer.isStarted());
  consumer.register();
  assertEquals(1, systemConsumer.getRegisterCount());
}
 
Example #4
Source File: AbandonedCartStreamTask.java    From Unified-Log-Processing with Apache License 2.0 6 votes vote down vote up
@Override
public void window(MessageCollector collector,
  TaskCoordinator coordinator) {

  KeyValueIterator<String, String> entries = store.all();
  while (entries.hasNext()) {                                        // c
    Entry<String, String> entry = entries.next();
    String key = entry.getKey();
    String value = entry.getValue();
    if (isTimestampKey(key) && Cart.isAbandoned(value)) {            // d
      String shopper = extractShopper(key);
      String cart = store.get(asCartKey(shopper));
      
      AbandonedCartEvent event =
        new AbandonedCartEvent(shopper, cart);
      collector.send(new OutgoingMessageEnvelope(
        new SystemStream("kafka", "derived-events-ch04"), event));    // e
      
      resetShopper(shopper);
    }
  }
}
 
Example #5
Source File: WatermarkStates.java    From samza with Apache License 2.0 6 votes vote down vote up
WatermarkStates(
    Set<SystemStreamPartition> ssps,
    Map<SystemStream, Integer> producerTaskCounts,
    MetricsRegistry metricsRegistry) {
  final Map<SystemStreamPartition, WatermarkState> states = new HashMap<>();
  final List<SystemStreamPartition> intSsps = new ArrayList<>();

  ssps.forEach(ssp -> {
    final int producerCount = producerTaskCounts.getOrDefault(ssp.getSystemStream(), 0);
    states.put(ssp, new WatermarkState(producerCount));
    if (producerCount != 0) {
      intSsps.add(ssp);
    }
  });
  this.watermarkStates = Collections.unmodifiableMap(states);
  this.watermarkMetrics = new WatermarkMetrics(metricsRegistry);
  this.intermediateSsps = Collections.unmodifiableList(intSsps);
}
 
Example #6
Source File: TestCoordinatorStreamSystemConsumer.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testCoordinatorStreamSystemConsumer() {
  Map<String, String> expectedConfig = new LinkedHashMap<String, String>();
  expectedConfig.put("job.id", "1234");
  SystemStream systemStream = new SystemStream("system", "stream");
  MockSystemConsumer systemConsumer = new MockSystemConsumer(new SystemStreamPartition(systemStream, new Partition(0)));
  CoordinatorStreamSystemConsumer consumer = new CoordinatorStreamSystemConsumer(systemStream, systemConsumer, new SinglePartitionWithoutOffsetsSystemAdmin());
  assertEquals(0, systemConsumer.getRegisterCount());
  consumer.register();
  assertEquals(1, systemConsumer.getRegisterCount());
  assertFalse(systemConsumer.isStarted());
  consumer.start();
  assertTrue(systemConsumer.isStarted());
  try {
    consumer.getConfig();
    fail("Should have failed when retrieving config before bootstrapping.");
  } catch (SamzaException e) {
    // Expected.
  }
  consumer.bootstrap();
  assertEquals(expectedConfig, consumer.getConfig());
  assertFalse(systemConsumer.isStopped());
  consumer.stop();
  assertTrue(systemConsumer.isStopped());
}
 
Example #7
Source File: DiagnosticsManager.java    From samza with Apache License 2.0 6 votes vote down vote up
public DiagnosticsManager(String jobName,
    String jobId,
    Map<String, ContainerModel> containerModels,
    int containerMemoryMb,
    int containerNumCores,
    int numPersistentStores,
    long maxHeapSizeBytes,
    int containerThreadPoolSize,
    String containerId,
    String executionEnvContainerId,
    String taskClassVersion,
    String samzaVersion,
    String hostname,
    SystemStream diagnosticSystemStream,
    SystemProducer systemProducer,
    Duration terminationDuration, boolean autosizingEnabled) {

  this(jobName, jobId, containerModels, containerMemoryMb, containerNumCores, numPersistentStores, maxHeapSizeBytes, containerThreadPoolSize,
      containerId, executionEnvContainerId, taskClassVersion, samzaVersion, hostname, diagnosticSystemStream, systemProducer,
      terminationDuration, Executors.newSingleThreadScheduledExecutor(
          new ThreadFactoryBuilder().setNameFormat(PUBLISH_THREAD_NAME).setDaemon(true).build()), autosizingEnabled);
}
 
Example #8
Source File: TestApplicationMasterRestClient.java    From samza with Apache License 2.0 6 votes vote down vote up
private HashMap<String, ContainerModel> generateContainers() {
  Set<TaskModel> taskModels = ImmutableSet.of(
      new TaskModel(new TaskName("task1"),
                    ImmutableSet.of(new SystemStreamPartition(new SystemStream("system1", "stream1"), new Partition(0))),
                    new Partition(0)),
      new TaskModel(new TaskName("task2"),
          ImmutableSet.of(new SystemStreamPartition(new SystemStream("system1", "stream1"), new Partition(1))),
          new Partition(1)));
  GroupByContainerCount grouper = new GroupByContainerCount(2);
  Set<ContainerModel> containerModels = grouper.group(taskModels);
  HashMap<String, ContainerModel> containers = new HashMap<>();
  for (ContainerModel containerModel : containerModels) {
    containers.put(containerModel.getId(), containerModel);
  }
  return containers;
}
 
Example #9
Source File: TransactionalStateTaskRestoreManager.java    From samza with Apache License 2.0 6 votes vote down vote up
public TransactionalStateTaskRestoreManager(
    TaskModel taskModel,
    Map<String, StorageEngine> storeEngines,
    Map<String, SystemStream> storeChangelogs,
    SystemAdmins systemAdmins,
    Map<String, SystemConsumer> storeConsumers,
    SSPMetadataCache sspMetadataCache,
    File loggedStoreBaseDirectory,
    File nonLoggedStoreBaseDirectory,
    Config config,
    Clock clock) {
  this.taskModel = taskModel;
  this.storeEngines = storeEngines;
  this.storeChangelogs = storeChangelogs;
  this.systemAdmins = systemAdmins;
  this.storeConsumers = storeConsumers;
  // OK to use SSPMetadataCache here since unlike commit newest changelog ssp offsets will not change
  // between cache init and restore completion
  this.sspMetadataCache = sspMetadataCache;
  this.loggedStoreBaseDirectory = loggedStoreBaseDirectory;
  this.nonLoggedStoreBaseDirectory = nonLoggedStoreBaseDirectory;
  this.config = config;
  this.clock = clock;
  this.storageManagerUtil = new StorageManagerUtil();
  this.fileUtil = new FileUtil();
}
 
Example #10
Source File: StorageManagerUtil.java    From samza with Apache License 2.0 6 votes vote down vote up
/**
 * Directory loggedStoreDir associated with the store storeName is valid if all of the following
 * conditions are true:
 * a) If the store is a persistent store.
 * b) If there is a valid offset file associated with the store.
 * c) If the store has not gone stale.
 *
 * @return true if the logged store is valid, false otherwise.
 */
public boolean isLoggedStoreValid(String storeName, File loggedStoreDir, Config config,
    Map<String, SystemStream> storeChangelogs, TaskModel taskModel, Clock clock, Map<String, StorageEngine> taskStores) {
  long changeLogDeleteRetentionInMs = new StorageConfig(config).getChangeLogDeleteRetentionInMs(storeName);

  if (storeChangelogs.containsKey(storeName)) {
    SystemStreamPartition changelogSSP = new SystemStreamPartition(
        storeChangelogs.get(storeName), taskModel.getChangelogPartition());

    return taskStores.get(storeName).getStoreProperties().isPersistedToDisk()
        && isOffsetFileValid(loggedStoreDir, Collections.singleton(changelogSSP), false)
        && !isStaleStore(loggedStoreDir, changeLogDeleteRetentionInMs, clock.currentTimeMillis(), false);
  }

  return false;
}
 
Example #11
Source File: ContainerStorageManager.java    From samza with Apache License 2.0 6 votes vote down vote up
/**
 * For each standby task, we remove its changeLogSSPs from changelogSSP map and add it to the task's taskSideInputSSPs.
 * The task's sideInputManager will consume and restore these as well.
 *
 * @param containerModel the container's model
 * @param changelogSystemStreams the passed in set of changelogSystemStreams
 * @return A map of changeLogSSP to storeName across all tasks, assuming no two stores have the same changelogSSP
 */
private Map<String, SystemStream> getChangelogSystemStreams(ContainerModel containerModel, Map<String, SystemStream> changelogSystemStreams) {

  if (MapUtils.invertMap(changelogSystemStreams).size() != changelogSystemStreams.size()) {
    throw new SamzaException("Two stores cannot have the same changelog system-stream");
  }

  Map<SystemStreamPartition, String> changelogSSPToStore = new HashMap<>();
  changelogSystemStreams.forEach((storeName, systemStream) ->
      containerModel.getTasks().forEach((taskName, taskModel) -> { changelogSSPToStore.put(new SystemStreamPartition(systemStream, taskModel.getChangelogPartition()), storeName); })
  );

  getTasks(containerModel, TaskMode.Standby).forEach((taskName, taskModel) -> {
    this.taskSideInputStoreSSPs.putIfAbsent(taskName, new HashMap<>());
    changelogSystemStreams.forEach((storeName, systemStream) -> {
      SystemStreamPartition ssp = new SystemStreamPartition(systemStream, taskModel.getChangelogPartition());
      changelogSSPToStore.remove(ssp);
      this.taskSideInputStoreSSPs.get(taskName).put(storeName, Collections.singleton(ssp));
    });
  });

  // changelogSystemStreams correspond only to active tasks (since those of standby-tasks moved to sideInputs above)
  return MapUtils.invertMap(changelogSSPToStore).entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, x -> x.getValue().getSystemStream()));
}
 
Example #12
Source File: TransactionalStateTaskRestoreManager.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void restore() throws InterruptedException {
  Map<String, RestoreOffsets> storesToRestore = storeActions.storesToRestore;

  for (Map.Entry<String, RestoreOffsets> entry : storesToRestore.entrySet()) {
    String storeName = entry.getKey();
    String endOffset = entry.getValue().endingOffset;
    SystemStream systemStream = storeChangelogs.get(storeName);
    SystemAdmin systemAdmin = systemAdmins.getSystemAdmin(systemStream.getSystem());
    SystemConsumer systemConsumer = storeConsumers.get(storeName);
    SystemStreamPartition changelogSSP = new SystemStreamPartition(systemStream, taskModel.getChangelogPartition());

    ChangelogSSPIterator changelogSSPIterator =
        new ChangelogSSPIterator(systemConsumer, changelogSSP, endOffset, systemAdmin, true);
    StorageEngine taskStore = storeEngines.get(storeName);

    LOG.info("Restoring store: {} for task: {}", storeName, taskModel.getTaskName());
    taskStore.restore(changelogSSPIterator);
  }
}
 
Example #13
Source File: AzureJobCoordinator.java    From samza with Apache License 2.0 6 votes vote down vote up
/**
 * For each input stream specified in config, exactly determine its
 * partitions, returning a set of SystemStreamPartitions containing them all.
 */
private Set<SystemStreamPartition> getInputStreamPartitions() {
  TaskConfig taskConfig = new TaskConfig(config);
  scala.collection.immutable.Set<SystemStream> inputSystemStreams =
      JavaConverters.asScalaSetConverter(taskConfig.getInputStreams()).asScala().toSet();

  // Get the set of partitions for each SystemStream from the stream metadata
  Set<SystemStreamPartition>
      sspSet = JavaConverters.mapAsJavaMapConverter(streamMetadataCache.getStreamMetadata(inputSystemStreams, true)).asJava()
      .entrySet()
      .stream()
      .flatMap(this::mapSSMToSSP)
      .collect(Collectors.toSet());

  return sspSet;
}
 
Example #14
Source File: TaskConfig.java    From samza with Apache License 2.0 5 votes vote down vote up
/**
 * Get the input streams, not including the broadcast streams. Use {@link #getAllInputStreams()} to also get the
 * broadcast streams.
 */
public Set<SystemStream> getInputStreams() {
  Optional<String> inputStreams = Optional.ofNullable(get(INPUT_STREAMS));
  if (!inputStreams.isPresent() || inputStreams.get().isEmpty()) {
    return Collections.emptySet();
  } else {
    return Stream.of(inputStreams.get().split(","))
        .map(systemStreamNames -> StreamUtil.getSystemStreamFromNames(systemStreamNames.trim()))
        .collect(Collectors.toSet());
  }
}
 
Example #15
Source File: OperatorImplGraph.java    From samza with Apache License 2.0 5 votes vote down vote up
/**
 * calculate the mapping from output streams to input streams
 * @param specGraph the user {@link OperatorSpecGraph}
 * @return mapping from output streams to input streams
 */
static Multimap<SystemStream, SystemStream> getIntermediateToInputStreamsMap(
  OperatorSpecGraph specGraph, StreamConfig streamConfig) {
  Multimap<SystemStream, SystemStream> outputToInputStreams = HashMultimap.create();
  specGraph.getInputOperators().entrySet().stream()
      .forEach(entry -> {
        SystemStream systemStream = streamConfig.streamIdToSystemStream(entry.getKey());
        computeOutputToInput(systemStream, entry.getValue(), outputToInputStreams, streamConfig);
      });
  return outputToInputStreams;
}
 
Example #16
Source File: TestTransactionalStateTaskStorageManager.java    From samza with Apache License 2.0 5 votes vote down vote up
/**
 * This should never happen with CheckpointingTaskStorageManager. #getNewestChangelogSSPOffset must
 * return a key for every changelog SSP. If the SSP is empty, the value should be none. If it could
 * not fetch metadata, it should throw an exception instead of skipping the SSP.
 * If this contract is accidentally broken, ensure that we fail the commit
 */
@Test(expected = SamzaException.class)
public void testWriteChangelogOffsetFilesWithNoChangelogOffset() throws IOException {
  String storeName = "mockStore";
  ContainerStorageManager csm = mock(ContainerStorageManager.class);
  StorageEngine mockStore = mock(StorageEngine.class);
  java.util.Map<String, StorageEngine> taskStores = ImmutableMap.of(storeName, mockStore);
  when(csm.getAllStores(any())).thenReturn(taskStores);

  Partition changelogPartition = new Partition(0);
  SystemStream changelogSS = new SystemStream("system", "changelog");
  SystemStreamPartition changelogSSP = new SystemStreamPartition(changelogSS, changelogPartition);
  TransactionalStateTaskStorageManager tsm = spy(buildTSM(csm, changelogPartition, new StorageManagerUtil()));

  // no mapping present for changelog newest offset
  Map<SystemStreamPartition, Option<String>> offsets = ScalaJavaUtil.toScalaMap(ImmutableMap.of());

  Path checkpointPath = Files.createTempDirectory("store-checkpoint-test").toAbsolutePath();
  Map<String, Path> checkpointPaths = ScalaJavaUtil.toScalaMap(
      ImmutableMap.of(storeName, checkpointPath));
  Map<String, SystemStream> storeChangelogs = ScalaJavaUtil.toScalaMap(
      ImmutableMap.of(storeName, changelogSS));

  // invoke method
  tsm.writeChangelogOffsetFiles(checkpointPaths, storeChangelogs, offsets);

  fail("Should have thrown an exception if no changelog offset found for checkpointed store");
}
 
Example #17
Source File: OperatorImpl.java    From samza with Apache License 2.0 5 votes vote down vote up
/**
 * Aggregate {@link EndOfStreamMessage} from each ssp of the stream.
 * Invoke onEndOfStream() if the stream reaches the end.
 * @param eos {@link EndOfStreamMessage} object
 * @param ssp system stream partition
 * @param collector message collector
 * @param coordinator task coordinator
 */
public final CompletionStage<Void> aggregateEndOfStream(EndOfStreamMessage eos, SystemStreamPartition ssp, MessageCollector collector,
    TaskCoordinator coordinator) {
  LOG.info("Received end-of-stream message from task {} in {}", eos.getTaskName(), ssp);
  eosStates.update(eos, ssp);

  SystemStream stream = ssp.getSystemStream();
  CompletionStage<Void> endOfStreamFuture = CompletableFuture.completedFuture(null);

  if (eosStates.isEndOfStream(stream)) {
    LOG.info("Input {} reaches the end for task {}", stream.toString(), taskName.getTaskName());
    if (eos.getTaskName() != null) {
      // This is the aggregation task, which already received all the eos messages from upstream
      // broadcast the end-of-stream to all the peer partitions
      controlMessageSender.broadcastToOtherPartitions(new EndOfStreamMessage(), ssp, collector);
    }

    // populate the end-of-stream through the dag
    endOfStreamFuture = onEndOfStream(collector, coordinator)
        .thenAccept(result -> {
          if (eosStates.allEndOfStream()) {
            // all inputs have been end-of-stream, shut down the task
            LOG.info("All input streams have reached the end for task {}", taskName.getTaskName());
            coordinator.commit(TaskCoordinator.RequestScope.CURRENT_TASK);
            coordinator.shutdown(TaskCoordinator.RequestScope.CURRENT_TASK);
          }
        });
  }

  return endOfStreamFuture;
}
 
Example #18
Source File: TaskConfig.java    From samza with Apache License 2.0 5 votes vote down vote up
/**
 * Get the SystemStreams for the configured broadcast streams.
 *
 * @return the set of SystemStreams for which there are broadcast stream SSPs configured.
 */
public Set<SystemStream> getBroadcastSystemStreams() {
  Set<SystemStream> broadcastSS = new HashSet<>();
  Set<SystemStreamPartition> broadcastSSPs = getBroadcastSystemStreamPartitions();
  for (SystemStreamPartition bssp : broadcastSSPs) {
    broadcastSS.add(bssp.getSystemStream());
  }
  return Collections.unmodifiableSet(broadcastSS);
}
 
Example #19
Source File: TestTaskSideInputHandler.java    From samza with Apache License 2.0 5 votes vote down vote up
/**
 * This test is for cases, when calls to systemAdmin (e.g., KafkaSystemAdmin's) get-stream-metadata method return null.
 */
@Test
public void testGetStartingOffsetsWhenStreamMetadataIsNull() {
  final String taskName = "test-get-starting-offset-task";

  Set<SystemStreamPartition> ssps = IntStream.range(1, 2)
      .mapToObj(idx -> new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(idx)))
      .collect(Collectors.toSet());
  Map<Partition, SystemStreamMetadata.SystemStreamPartitionMetadata> partitionMetadata = ssps.stream()
      .collect(Collectors.toMap(SystemStreamPartition::getPartition,
        x -> new SystemStreamMetadata.SystemStreamPartitionMetadata(null, "1", "2")));


  TaskSideInputHandler handler = new MockTaskSideInputHandlerBuilder(taskName, TaskMode.Active)
      .addStreamMetadata(Collections.singletonMap(new SystemStream(TEST_SYSTEM, TEST_STREAM),
          new SystemStreamMetadata(TEST_STREAM, partitionMetadata)))
      .addStore(TEST_STORE, ssps)
      .build();

  handler.init();

  ssps.forEach(ssp -> {
    String startingOffset = handler.getStartingOffset(
        new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, ssp.getPartition()));
    Assert.assertNull("Starting offset should be null", startingOffset);
  });
}
 
Example #20
Source File: StreamConfig.java    From samza with Apache License 2.0 5 votes vote down vote up
public boolean getResetOffset(SystemStream systemStream) {
  String resetOffset = getSamzaProperty(systemStream, CONSUMER_RESET_OFFSET, "false");
  if (!resetOffset.equalsIgnoreCase("true") && !resetOffset.equalsIgnoreCase("false")) {
    LOG.warn("Got a .samza.reset.offset configuration for SystemStream {} that is not true or false (was {})." +
      " Defaulting to false.", systemStream, resetOffset);

    resetOffset = "false";
  }
  return Boolean.valueOf(resetOffset);
}
 
Example #21
Source File: TestTransactionalStateTaskStorageManager.java    From samza with Apache License 2.0 5 votes vote down vote up
private TransactionalStateTaskStorageManager buildTSM(ContainerStorageManager csm, Partition changelogPartition,
    StorageManagerUtil smu) {
  TaskName taskName = new TaskName("Partition 0");
  Map<String, SystemStream> changelogSystemStreams = mock(Map.class);
  SystemAdmins systemAdmins = mock(SystemAdmins.class);
  File loggedStoreBaseDir = mock(File.class);
  TaskMode taskMode = TaskMode.Active;

  return new TransactionalStateTaskStorageManager(
      taskName, csm, changelogSystemStreams, systemAdmins,
      loggedStoreBaseDir, changelogPartition, taskMode, smu);
}
 
Example #22
Source File: NonTransactionalStateTaskRestoreManager.java    From samza with Apache License 2.0 5 votes vote down vote up
/**
 *  Validates each changelog system-stream with its respective SystemAdmin.
 */
private void validateChangelogStreams() {
  LOG.info("Validating change log streams: " + changelogSystemStreams);

  for (SystemStream changelogSystemStream : changelogSystemStreams.values()) {
    SystemAdmin systemAdmin = systemAdmins.getSystemAdmin(changelogSystemStream.getSystem());
    StreamSpec changelogSpec =
        StreamSpec.createChangeLogStreamSpec(changelogSystemStream.getStream(), changelogSystemStream.getSystem(),
            maxChangeLogStreamPartitions);

    systemAdmin.validateStream(changelogSpec);
  }
}
 
Example #23
Source File: StreamApplicationIntegrationTest.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
  KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("test");
  KafkaInputDescriptor<KV<String, PageView>> isd =
      ksd.getInputDescriptor("PageView", KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()));
  MessageStream<KV<String, TestData.PageView>> inputStream = appDescriptor.getInputStream(isd);
  inputStream
      .map(KV::getValue)
      .partitionBy(PageView::getMemberId, pv -> pv, KVSerde.of(new IntegerSerde(), new JsonSerdeV2<>(PageView.class)), "p1")
      .sink((m, collector, coordinator) ->
          collector.send(new OutgoingMessageEnvelope(new SystemStream("test", "Output"), m.getKey(), m.getKey(), m)));
}
 
Example #24
Source File: TestTransactionalStateTaskStorageManager.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testWriteChangelogOffsetFiles() throws IOException {
  String storeName = "mockStore";
  ContainerStorageManager csm = mock(ContainerStorageManager.class);
  StorageEngine mockStore = mock(StorageEngine.class);
  java.util.Map<String, StorageEngine> taskStores = ImmutableMap.of(storeName, mockStore);
  when(csm.getAllStores(any())).thenReturn(taskStores);

  Partition changelogPartition = new Partition(0);
  SystemStream changelogSS = new SystemStream("system", "changelog");
  SystemStreamPartition changelogSSP = new SystemStreamPartition(changelogSS, changelogPartition);
  StorageManagerUtil smu = spy(new StorageManagerUtil());
  File mockCurrentStoreDir = mock(File.class);
  doReturn(mockCurrentStoreDir).when(smu).getTaskStoreDir(any(), eq(storeName), any(), any());
  doNothing().when(smu).writeOffsetFile(eq(mockCurrentStoreDir), any(), anyBoolean());
  TransactionalStateTaskStorageManager tsm = spy(buildTSM(csm, changelogPartition, smu));

  String changelogNewestOffset = "1";
  Map<SystemStreamPartition, Option<String>> offsets = ScalaJavaUtil.toScalaMap(
      ImmutableMap.of(changelogSSP, Option.apply(changelogNewestOffset)));

  Path checkpointPath = Files.createTempDirectory("store-checkpoint-test").toAbsolutePath();

  Map<String, Path> checkpointPaths = ScalaJavaUtil.toScalaMap(
      ImmutableMap.of(storeName, checkpointPath));
  Map<String, SystemStream> storeChangelogs = ScalaJavaUtil.toScalaMap(
      ImmutableMap.of(storeName, changelogSS));

  // invoke method
  tsm.writeChangelogOffsetFiles(checkpointPaths, storeChangelogs, offsets);

  // verify that offset file was written to the checkpoint dir
  java.util.Map<SystemStreamPartition, String> fileOffsets = new StorageManagerUtil()
      .readOffsetFile(checkpointPath.toFile(), ImmutableSet.of(changelogSSP), false);
  assertEquals(1, fileOffsets.size());
  assertEquals(changelogNewestOffset, fileOffsets.get(changelogSSP));

  // verify that offset file write was called on the current dir
  verify(smu, times(1)).writeOffsetFile(eq(mockCurrentStoreDir), any(), anyBoolean());
}
 
Example #25
Source File: ControlMessageSender.java    From samza with Apache License 2.0 5 votes vote down vote up
void broadcastToOtherPartitions(ControlMessage message, SystemStreamPartition ssp, MessageCollector collector) {
  SystemStream systemStream = ssp.getSystemStream();
  int partitionCount = getPartitionCount(systemStream);
  int currentPartition = ssp.getPartition().getPartitionId();
  for (int i = 0; i < partitionCount; i++) {
    if (i != currentPartition) {
      OutgoingMessageEnvelope envelopeOut = new OutgoingMessageEnvelope(systemStream, i, null, message);
      collector.send(envelopeOut);
    }
  }
}
 
Example #26
Source File: ControlMessageSender.java    From samza with Apache License 2.0 5 votes vote down vote up
void send(ControlMessage message, SystemStream systemStream, MessageCollector collector) {
  int partitionCount = getPartitionCount(systemStream);
  // We pick a partition based on topic hashcode to aggregate the control messages from upstream tasks
  // After aggregation the task will broadcast the results to other partitions
  int aggregatePartition = systemStream.getStream().hashCode() % partitionCount;

  LOG.debug(String.format("Send %s message from task %s to %s partition %s for aggregation",
      MessageType.of(message).name(), message.getTaskName(), systemStream, aggregatePartition));

  OutgoingMessageEnvelope envelopeOut = new OutgoingMessageEnvelope(systemStream, aggregatePartition, null, message);
  collector.send(envelopeOut);
}
 
Example #27
Source File: NonTransactionalStateTaskRestoreManager.java    From samza with Apache License 2.0 5 votes vote down vote up
/**
 * Builds a map from SystemStreamPartition to oldest offset for changelogs.
 */
private Map<SystemStream, String> getChangeLogOldestOffsetsForPartition(Partition partition,
    Map<SystemStream, SystemStreamMetadata> inputStreamMetadata) {

  Map<SystemStream, String> retVal = new HashMap<>();

  // NOTE: do not use Collectors.Map because of https://bugs.openjdk.java.net/browse/JDK-8148463
  inputStreamMetadata.entrySet()
      .stream()
      .filter(x -> x.getValue().getSystemStreamPartitionMetadata().get(partition) != null)
      .forEach(e -> retVal.put(e.getKey(),
          e.getValue().getSystemStreamPartitionMetadata().get(partition).getOldestOffset()));

  return retVal;
}
 
Example #28
Source File: TestTaskConfig.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetBroadcastSystemStreams() {
  Config config = new MapConfig(ImmutableMap.of(TaskConfig.BROADCAST_INPUT_STREAMS,
      "kafka.foo#4, kafka.bar#5, otherKafka.foo#4, otherKafka.foo.bar#5"));
  Set<SystemStream> expected = ImmutableSet.of(
      new SystemStream("kafka", "foo"),
      new SystemStream("kafka", "bar"),
      new SystemStream("otherKafka", "foo"),
      new SystemStream("otherKafka", "foo.bar"));
  assertEquals(expected, new TaskConfig(config).getBroadcastSystemStreams());
  assertTrue(new TaskConfig(new MapConfig()).getBroadcastSystemStreams().isEmpty());
}
 
Example #29
Source File: TestControlMessageSender.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testBroadcast() {
  SystemStreamMetadata metadata = mock(SystemStreamMetadata.class);
  Map<Partition, SystemStreamMetadata.SystemStreamPartitionMetadata> partitionMetadata = new HashMap<>();
  partitionMetadata.put(new Partition(0), mock(SystemStreamMetadata.SystemStreamPartitionMetadata.class));
  partitionMetadata.put(new Partition(1), mock(SystemStreamMetadata.SystemStreamPartitionMetadata.class));
  partitionMetadata.put(new Partition(2), mock(SystemStreamMetadata.SystemStreamPartitionMetadata.class));
  partitionMetadata.put(new Partition(3), mock(SystemStreamMetadata.SystemStreamPartitionMetadata.class));
  when(metadata.getSystemStreamPartitionMetadata()).thenReturn(partitionMetadata);
  StreamMetadataCache metadataCache = mock(StreamMetadataCache.class);
  when(metadataCache.getSystemStreamMetadata(anyObject(), anyBoolean())).thenReturn(metadata);

  SystemStream systemStream = new SystemStream("test-system", "test-stream");
  Set<Integer> partitions = new HashSet<>();
  MessageCollector collector = mock(MessageCollector.class);
  doAnswer(invocation -> {
    OutgoingMessageEnvelope envelope = (OutgoingMessageEnvelope) invocation.getArguments()[0];
    partitions.add((Integer) envelope.getPartitionKey());
    assertEquals(envelope.getSystemStream(), systemStream);
    return null;
  }).when(collector).send(any());

  ControlMessageSender sender = new ControlMessageSender(metadataCache);
  WatermarkMessage watermark = new WatermarkMessage(System.currentTimeMillis(), "task 0");
  SystemStreamPartition ssp = new SystemStreamPartition(systemStream, new Partition(0));
  sender.broadcastToOtherPartitions(watermark, ssp, collector);
  assertEquals(partitions.size(), 3);
}
 
Example #30
Source File: DemoTaskFactory.java    From scotty-window-processor with Apache License 2.0 5 votes vote down vote up
@Override
public StreamTask createInstance() {
    SystemStream stream = new SystemStream(SYSTEM_DESCRIPTOR_NAME, OUTPUT_DESCRIPTOR_NAME);
    KeyedScottyWindowOperator operator = new KeyedScottyWindowOperator<Integer, Integer>
            (new SumWindowFunction(), 100, stream);
    operator.addWindow(new SlidingWindow(WindowMeasure.Time, 5000, 1000));
    operator.addWindow(new TumblingWindow(WindowMeasure.Time, 2000));

    return operator;
}