org.apache.samza.system.SystemStreamPartition Java Examples

The following examples show how to use org.apache.samza.system.SystemStreamPartition. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestCoordinatorStreamSystemConsumer.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testCoordinatorStreamSystemConsumer() {
  Map<String, String> expectedConfig = new LinkedHashMap<String, String>();
  expectedConfig.put("job.id", "1234");
  SystemStream systemStream = new SystemStream("system", "stream");
  MockSystemConsumer systemConsumer = new MockSystemConsumer(new SystemStreamPartition(systemStream, new Partition(0)));
  CoordinatorStreamSystemConsumer consumer = new CoordinatorStreamSystemConsumer(systemStream, systemConsumer, new SinglePartitionWithoutOffsetsSystemAdmin());
  assertEquals(0, systemConsumer.getRegisterCount());
  consumer.register();
  assertEquals(1, systemConsumer.getRegisterCount());
  assertFalse(systemConsumer.isStarted());
  consumer.start();
  assertTrue(systemConsumer.isStarted());
  try {
    consumer.getConfig();
    fail("Should have failed when retrieving config before bootstrapping.");
  } catch (SamzaException e) {
    // Expected.
  }
  consumer.bootstrap();
  assertEquals(expectedConfig, consumer.getConfig());
  assertFalse(systemConsumer.isStopped());
  consumer.stop();
  assertTrue(systemConsumer.isStopped());
}
 
Example #2
Source File: TestWindowOperator.java    From samza with Apache License 2.0 6 votes vote down vote up
@Before
public void setup() {
  Map<String, String> configMap = new HashMap<>();
  configMap.put("job.default.system", "kafka");
  configMap.put("job.name", "jobName");
  configMap.put("job.id", "jobId");
  this.config = new MapConfig(configMap);

  this.context = new MockContext();
  when(this.context.getJobContext().getConfig()).thenReturn(this.config);
  Serde storeKeySerde = new TimeSeriesKeySerde(new IntegerSerde());
  Serde storeValSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());

  TaskModel taskModel = mock(TaskModel.class);
  when(taskModel.getSystemStreamPartitions()).thenReturn(ImmutableSet
      .of(new SystemStreamPartition("kafka", "integers", new Partition(0))));
  when(taskModel.getTaskName()).thenReturn(new TaskName("task 1"));
  when(this.context.getTaskContext().getTaskModel()).thenReturn(taskModel);
  when(this.context.getTaskContext().getTaskMetricsRegistry()).thenReturn(new MetricsRegistryMap());
  when(this.context.getContainerContext().getContainerMetricsRegistry()).thenReturn(new MetricsRegistryMap());
  when(this.context.getTaskContext().getStore("jobName-jobId-window-w1"))
      .thenReturn(new TestInMemoryStore<>(storeKeySerde, storeValSerde));
}
 
Example #3
Source File: TestTaskPartitionAssignmentManager.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadPartitionAssignments() {
  SystemStreamPartition testSystemStreamPartition1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, PARTITION);
  List<String> testTaskNames1 = ImmutableList.of("test-task1", "test-task2", "test-task3");
  SystemStreamPartition testSystemStreamPartition2 = new SystemStreamPartition(TEST_SYSTEM, "stream-2", PARTITION);
  List<String> testTaskNames2 = ImmutableList.of("test-task4", "test-task5");
  SystemStreamPartition testSystemStreamPartition3 = new SystemStreamPartition(TEST_SYSTEM, "stream-3", PARTITION);
  List<String> testTaskNames3 = ImmutableList.of("test-task6", "test-task7", "test-task8");

  taskPartitionAssignmentManager.writeTaskPartitionAssignments(
      ImmutableMap.of(testSystemStreamPartition1, testTaskNames1, testSystemStreamPartition2, testTaskNames2,
          testSystemStreamPartition3, testTaskNames3));

  Map<SystemStreamPartition, List<String>> expectedMapping =
      ImmutableMap.of(testSystemStreamPartition1, testTaskNames1, testSystemStreamPartition2, testTaskNames2,
          testSystemStreamPartition3, testTaskNames3);
  Map<SystemStreamPartition, List<String>> actualMapping = taskPartitionAssignmentManager.readTaskPartitionAssignments();

  Assert.assertEquals(expectedMapping, actualMapping);
}
 
Example #4
Source File: TestTaskSideInputStorageManager.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testStop() {
  final String storeName = "test-stop-store";
  final String taskName = "test-stop-task";
  final SystemStreamPartition ssp = new SystemStreamPartition("test-system", "test-stream", new Partition(0));
  final String offset = "123";
  final ImmutableMap<SystemStreamPartition, String> processedOffsets = ImmutableMap.of(ssp, offset);

  TaskSideInputStorageManager testSideInputStorageManager = new MockTaskSideInputStorageManagerBuilder(taskName, NON_LOGGED_STORE_DIR)
      .addInMemoryStore(storeName, ImmutableSet.of())
      .build();

  initializeSideInputStorageManager(testSideInputStorageManager);
  testSideInputStorageManager.stop(processedOffsets);

  verify(testSideInputStorageManager.getStore(storeName)).stop();
  verify(testSideInputStorageManager).writeFileOffsets(eq(processedOffsets));
}
 
Example #5
Source File: UnboundedSourceSystem.java    From beam with Apache License 2.0 6 votes vote down vote up
List<IncomingMessageEnvelope> getNextMessages(SystemStreamPartition ssp, long timeoutMillis)
    throws InterruptedException {
  if (lastException != null) {
    throw new RuntimeException(lastException);
  }

  final List<IncomingMessageEnvelope> envelopes = new ArrayList<>();
  final BlockingQueue<IncomingMessageEnvelope> queue = queues.get(ssp);
  final IncomingMessageEnvelope envelope = queue.poll(timeoutMillis, TimeUnit.MILLISECONDS);

  if (envelope != null) {
    envelopes.add(envelope);
    queue.drainTo(envelopes);
  }

  final int numElements =
      (int) envelopes.stream().filter(ev -> (ev.getMessage() instanceof OpMessage)).count();
  available.release(numElements);

  if (lastException != null) {
    throw new RuntimeException(lastException);
  }

  return envelopes;
}
 
Example #6
Source File: TransactionalStateTaskRestoreManager.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public void restore() throws InterruptedException {
  Map<String, RestoreOffsets> storesToRestore = storeActions.storesToRestore;

  for (Map.Entry<String, RestoreOffsets> entry : storesToRestore.entrySet()) {
    String storeName = entry.getKey();
    String endOffset = entry.getValue().endingOffset;
    SystemStream systemStream = storeChangelogs.get(storeName);
    SystemAdmin systemAdmin = systemAdmins.getSystemAdmin(systemStream.getSystem());
    SystemConsumer systemConsumer = storeConsumers.get(storeName);
    SystemStreamPartition changelogSSP = new SystemStreamPartition(systemStream, taskModel.getChangelogPartition());

    ChangelogSSPIterator changelogSSPIterator =
        new ChangelogSSPIterator(systemConsumer, changelogSSP, endOffset, systemAdmin, true);
    StorageEngine taskStore = storeEngines.get(storeName);

    LOG.info("Restoring store: {} for task: {}", storeName, taskModel.getTaskName());
    taskStore.restore(changelogSSPIterator);
  }
}
 
Example #7
Source File: TestBlockingEnvelopeMap.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testShouldGetSomeMessages() throws InterruptedException {
  BlockingEnvelopeMap map = new MockBlockingEnvelopeMap();
  map.register(SSP, "0");
  map.put(SSP, ENVELOPE);
  Map<SystemStreamPartition, List<IncomingMessageEnvelope>> envelopes = map.poll(FETCH, 0);
  assertEquals(1, envelopes.size());
  assertEquals(1, envelopes.get(SSP).size());
  map.put(SSP, ENVELOPE);
  map.put(SSP, ENVELOPE);
  envelopes = map.poll(FETCH, 0);
  assertEquals(1, envelopes.size());
  assertEquals(2, envelopes.get(SSP).size());

  // Size info.
  assertEquals(0, map.getMessagesSizeInQueue(SSP));
}
 
Example #8
Source File: TestAllSspToSingleTaskGrouper.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testLocalStreamGroupedCorrectlyForPassthru() {
  HashSet<SystemStreamPartition> allSSPs = new HashSet<>();
  HashMap<String, String> configMap = new HashMap<>();

  configMap.put("job.coordinator.factory", "org.apache.samza.standalone.PassthroughJobCoordinatorFactory");
  configMap.put("processor.id", "1");
  configMap.put("processor.list", configMap.get("processor.id"));

  Config config = new MapConfig(configMap);

  SystemStreamPartitionGrouper grouper = grouperFactory.getSystemStreamPartitionGrouper(config);

  Collections.addAll(allSSPs, aa0, aa1, aa2, ab0);
  Map<TaskName, Set<SystemStreamPartition>> result = grouper.group(allSSPs);
  Map<TaskName, Set<SystemStreamPartition>> expectedResult = new HashMap<>();

  HashSet<SystemStreamPartition> partitions = new HashSet<>();
  partitions.add(aa0);
  partitions.add(aa1);
  partitions.add(aa2);
  partitions.add(ab0);
  expectedResult.put(new TaskName("Task-1"), partitions);

  assertEquals(expectedResult, result);
}
 
Example #9
Source File: RunLoop.java    From samza with Apache License 2.0 6 votes vote down vote up
/**
 * Fetch the pending envelope in the pending queue for the task to process.
 * Update the chooser for flow control on the SSP level. Once it's updated, the RunLoop
 * will be able to choose new messages from this SSP for the task to process. Note that we
 * update only when the envelope is first time being processed. This solves the issue in
 * Broadcast stream where a message need to be processed by multiple tasks. In that case,
 * the envelope will be in the pendingEnvelopeQueue of each task. Only the first fetch updates
 * the chooser with the next envelope in the broadcast stream partition.
 * The function will be called in the run loop thread so no synchronization.
 * @return
 */
private IncomingMessageEnvelope fetchEnvelope() {
  PendingEnvelope pendingEnvelope = pendingEnvelopeQueue.remove();
  int queueSize = pendingEnvelopeQueue.size();
  taskMetrics.pendingMessages().set(queueSize);
  log.trace("fetch envelope ssp {} offset {} to process.",
      pendingEnvelope.envelope.getSystemStreamPartition(), pendingEnvelope.envelope.getOffset());
  log.debug("Task {} pending envelopes count is {} after fetching.", taskName, queueSize);

  if (pendingEnvelope.markProcessed()) {
    SystemStreamPartition partition = pendingEnvelope.envelope.getSystemStreamPartition();
    consumerMultiplexer.tryUpdate(partition);
    log.debug("Update chooser for {}", partition);
  }
  return pendingEnvelope.envelope;
}
 
Example #10
Source File: ContainerStorageManager.java    From samza with Apache License 2.0 6 votes vote down vote up
/**
 * For each standby task, we remove its changeLogSSPs from changelogSSP map and add it to the task's taskSideInputSSPs.
 * The task's sideInputManager will consume and restore these as well.
 *
 * @param containerModel the container's model
 * @param changelogSystemStreams the passed in set of changelogSystemStreams
 * @return A map of changeLogSSP to storeName across all tasks, assuming no two stores have the same changelogSSP
 */
private Map<String, SystemStream> getChangelogSystemStreams(ContainerModel containerModel, Map<String, SystemStream> changelogSystemStreams) {

  if (MapUtils.invertMap(changelogSystemStreams).size() != changelogSystemStreams.size()) {
    throw new SamzaException("Two stores cannot have the same changelog system-stream");
  }

  Map<SystemStreamPartition, String> changelogSSPToStore = new HashMap<>();
  changelogSystemStreams.forEach((storeName, systemStream) ->
      containerModel.getTasks().forEach((taskName, taskModel) -> { changelogSSPToStore.put(new SystemStreamPartition(systemStream, taskModel.getChangelogPartition()), storeName); })
  );

  getTasks(containerModel, TaskMode.Standby).forEach((taskName, taskModel) -> {
    this.taskSideInputStoreSSPs.putIfAbsent(taskName, new HashMap<>());
    changelogSystemStreams.forEach((storeName, systemStream) -> {
      SystemStreamPartition ssp = new SystemStreamPartition(systemStream, taskModel.getChangelogPartition());
      changelogSSPToStore.remove(ssp);
      this.taskSideInputStoreSSPs.get(taskName).put(storeName, Collections.singleton(ssp));
    });
  });

  // changelogSystemStreams correspond only to active tasks (since those of standby-tasks moved to sideInputs above)
  return MapUtils.invertMap(changelogSSPToStore).entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, x -> x.getValue().getSystemStream()));
}
 
Example #11
Source File: BoundedSourceSystem.java    From beam with Apache License 2.0 6 votes vote down vote up
private List<IncomingMessageEnvelope> getNextMessages(
    SystemStreamPartition ssp, long timeoutMillis) throws InterruptedException {
  if (lastException != null) {
    throw new RuntimeException(lastException);
  }

  final List<IncomingMessageEnvelope> envelopes = new ArrayList<>();
  final BlockingQueue<IncomingMessageEnvelope> queue = queues.get(ssp);
  final IncomingMessageEnvelope envelope = queue.poll(timeoutMillis, TimeUnit.MILLISECONDS);

  if (envelope != null) {
    envelopes.add(envelope);
    queue.drainTo(envelopes);
  }

  available.release(envelopes.size());

  if (lastException != null) {
    throw new RuntimeException(lastException);
  }

  return envelopes;
}
 
Example #12
Source File: TestKafkaSystemAdminWithMock.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetSSPMetadataWithRetry() {
  SystemStreamPartition oneSSP = new SystemStreamPartition(TEST_SYSTEM, VALID_TOPIC, new Partition(0));
  SystemStreamPartition otherSSP = new SystemStreamPartition(TEST_SYSTEM, "otherTopic", new Partition(1));
  ImmutableSet<SystemStreamPartition> ssps = ImmutableSet.of(oneSSP, otherSSP);
  List<TopicPartition> topicPartitions = ssps.stream()
      .map(ssp -> new TopicPartition(ssp.getStream(), ssp.getPartition().getPartitionId()))
      .collect(Collectors.toList());
  Map<TopicPartition, Long> testBeginningOffsets =
      ImmutableMap.of(testTopicPartition0, KAFKA_BEGINNING_OFFSET_FOR_PARTITION0, testTopicPartition1,
          KAFKA_BEGINNING_OFFSET_FOR_PARTITION1);

  when(mockKafkaConsumer.beginningOffsets(topicPartitions)).thenThrow(new RuntimeException())
      .thenReturn(testBeginningOffsets);
  Map<SystemStreamPartition, SystemStreamMetadata.SystemStreamPartitionMetadata> sspMetadata =
      kafkaSystemAdmin.getSSPMetadata(ssps, new ExponentialSleepStrategy(2,
          1, 1));

  assertEquals("metadata should return for 2 topics", sspMetadata.size(), 2);

  // retried twice because the first fails and the second succeeds
  Mockito.verify(mockKafkaConsumer, Mockito.times(2)).beginningOffsets(topicPartitions);
}
 
Example #13
Source File: TestKafkaSystemAdminWithMock.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test(expected = SamzaException.class)
public void testGetSSPMetadataShouldTerminateAfterFiniteRetriesOnException() throws Exception {
  SystemStreamPartition oneSSP = new SystemStreamPartition(TEST_SYSTEM, VALID_TOPIC, new Partition(0));
  SystemStreamPartition otherSSP = new SystemStreamPartition(TEST_SYSTEM, "otherTopic", new Partition(1));

  ImmutableSet<SystemStreamPartition> ssps = ImmutableSet.of(oneSSP, otherSSP);
  List<TopicPartition> topicPartitions = ssps.stream()
      .map(ssp -> new TopicPartition(ssp.getStream(), ssp.getPartition().getPartitionId()))
      .collect(Collectors.toList());

  when(mockKafkaConsumer.beginningOffsets(topicPartitions)).thenThrow(new RuntimeException())
      .thenThrow(new RuntimeException());

  kafkaSystemAdmin.getSSPMetadata(ssps, new ExponentialSleepStrategy(2,
      1, 1));
}
 
Example #14
Source File: WatermarkStates.java    From samza with Apache License 2.0 6 votes vote down vote up
WatermarkStates(
    Set<SystemStreamPartition> ssps,
    Map<SystemStream, Integer> producerTaskCounts,
    MetricsRegistry metricsRegistry) {
  final Map<SystemStreamPartition, WatermarkState> states = new HashMap<>();
  final List<SystemStreamPartition> intSsps = new ArrayList<>();

  ssps.forEach(ssp -> {
    final int producerCount = producerTaskCounts.getOrDefault(ssp.getSystemStream(), 0);
    states.put(ssp, new WatermarkState(producerCount));
    if (producerCount != 0) {
      intSsps.add(ssp);
    }
  });
  this.watermarkStates = Collections.unmodifiableMap(states);
  this.watermarkMetrics = new WatermarkMetrics(metricsRegistry);
  this.intermediateSsps = Collections.unmodifiableList(intSsps);
}
 
Example #15
Source File: StorageManagerUtil.java    From samza with Apache License 2.0 6 votes vote down vote up
/**
 * Read and return the offset from the directory's offset file
 *
 * @param storagePartitionDir the base directory of the store
 * @param storeSSPs SSPs associated with the store (if any)
 * @param isSideInput, true if the store is a side-input store, false otherwise
 * @return the content of the offset file if it exists for the store, null otherwise.
 */
public Map<SystemStreamPartition, String> readOffsetFile(File storagePartitionDir, Set<SystemStreamPartition> storeSSPs, boolean isSideInput) {

  File offsetFileRefNew = new File(storagePartitionDir, OFFSET_FILE_NAME_NEW);
  File offsetFileRefLegacy = new File(storagePartitionDir, OFFSET_FILE_NAME_LEGACY);
  File sideInputOffsetFileRefLegacy = new File(storagePartitionDir, SIDE_INPUT_OFFSET_FILE_NAME_LEGACY);

  // First we check if the new offset file exists, if it does we read offsets from it regardless of old or new format,
  // if it doesn't exist, we check if the store is non-sideInput and legacy-offset file exists, if so we read offsets
  // from the old non-side-input offset file (regardless of the offset format),
  // last, we check if the store is a sideInput and the old side-input-offset file exists
  if (offsetFileRefNew.exists()) {
    return readOffsetFile(storagePartitionDir, offsetFileRefNew.getName(), storeSSPs);
  } else if (!isSideInput && offsetFileRefLegacy.exists()) {
    return readOffsetFile(storagePartitionDir, offsetFileRefLegacy.getName(), storeSSPs);
  } else if (isSideInput && sideInputOffsetFileRefLegacy.exists()) {
    return readOffsetFile(storagePartitionDir, sideInputOffsetFileRefLegacy.getName(), storeSSPs);
  } else {
    return new HashMap<>();
  }
}
 
Example #16
Source File: TestInMemorySystem.java    From samza with Apache License 2.0 6 votes vote down vote up
private List<IncomingMessageEnvelope> consumeRawMessages(SystemConsumer consumer, Set<SystemStreamPartition> sspsToPoll) {
  try {
    Map<SystemStreamPartition, List<IncomingMessageEnvelope>> results = consumer.poll(sspsToPoll, POLL_TIMEOUT_MS);

    return results.entrySet()
        .stream()
        .filter(entry -> entry.getValue().size() != 0)
        .map(Map.Entry::getValue)
        .flatMap(List::stream)
        .collect(Collectors.toList());
  } catch (Exception e) {
    fail("Unable to consume messages");
  }

  return new ArrayList<>();
}
 
Example #17
Source File: TestKinesisSystemConsumer.java    From samza with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private boolean isSspAvailable(KinesisSystemConsumer consumer, SystemStreamPartition ssp)
    throws NoSuchFieldException, IllegalAccessException {
  SSPAllocator sspAllocator = getSspAllocator(consumer);
  Field f = sspAllocator.getClass().getDeclaredField("availableSsps");
  f.setAccessible(true);
  Map<String, Set<SystemStreamPartition>> availableSsps = (Map<String, Set<SystemStreamPartition>>) f.get(
      sspAllocator);
  return availableSsps.containsKey(ssp.getStream()) && availableSsps.get(ssp.getStream()).contains(ssp);
}
 
Example #18
Source File: TestEventHubSystemAdmin.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testStartpointResolverShouldResolveTheStartpointTimestampToCorrectOffset() throws EventHubException {
  // Initialize variables required for testing.
  EventHubSystemAdmin mockEventHubSystemAdmin = Mockito.mock(EventHubSystemAdmin.class);
  EventHubConfig eventHubConfig = Mockito.mock(EventHubConfig.class);
  SystemStreamPartition systemStreamPartition = new SystemStreamPartition("test-system", "test-stream", new Partition(0));
  String mockedOffsetToReturn = "100";

  // Setup the mock variables.
  EventHubClientManager mockEventHubClientManager = Mockito.mock(EventHubClientManager.class);
  EventHubClient mockEventHubClient = Mockito.mock(EventHubClient.class);
  PartitionReceiver mockPartitionReceiver = Mockito.mock(PartitionReceiver.class);
  EventData mockEventData = Mockito.mock(EventData.class);
  EventData.SystemProperties mockSystemProperties = Mockito.mock(EventData.SystemProperties.class);

  // Configure the mock variables to return the appropriate values.
  Mockito.when(mockEventHubSystemAdmin.getOrCreateStreamEventHubClient("test-stream")).thenReturn(mockEventHubClientManager);
  Mockito.when(mockEventHubClientManager.getEventHubClient()).thenReturn(mockEventHubClient);
  Mockito.when(mockEventHubClient.createReceiverSync(Mockito.anyString(), Mockito.anyString(), Mockito.any())).thenReturn(mockPartitionReceiver);
  Mockito.when(mockPartitionReceiver.receiveSync(1)).thenReturn(Arrays.asList(mockEventData));
  Mockito.when(mockEventData.getSystemProperties()).thenReturn(mockSystemProperties);
  Mockito.when(mockSystemProperties.getOffset()).thenReturn(mockedOffsetToReturn);

  // Test the Offset resolver.
  EventHubSamzaOffsetResolver resolver = new EventHubSamzaOffsetResolver(mockEventHubSystemAdmin, eventHubConfig);
  String resolvedOffset = resolver.visit(systemStreamPartition, new StartpointTimestamp(100L));
  Assert.assertEquals(mockedOffsetToReturn, resolvedOffset);
}
 
Example #19
Source File: KafkaSystemAdmin.java    From samza with Apache License 2.0 5 votes vote down vote up
/**
 * Determines the end offset of the {@param SystemStreamPartition}.
 * @param systemStreamPartition represents the system stream partition.
 * @return the end offset of the partition.
 */
private String getEndOffset(SystemStreamPartition systemStreamPartition) {
  TopicPartition topicPartition = toTopicPartition(systemStreamPartition);
  Map<TopicPartition, Long> topicPartitionToOffsets = threadSafeKafkaConsumer.execute(consumer -> consumer.endOffsets(ImmutableSet.of(topicPartition)));
  Long endOffset = topicPartitionToOffsets.get(topicPartition);
  LOG.info("End offset for topic partition: {} is {}.", topicPartition, endOffset);
  return String.valueOf(endOffset);
}
 
Example #20
Source File: TestWindowOperator.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testEndOfStreamFlushesWithEarlyTriggerFirings() throws Exception {

  OperatorSpecGraph sgb = this.getTumblingWindowStreamGraph(AccumulationMode.DISCARDING,
      Duration.ofSeconds(1), Triggers.repeat(Triggers.count(2))).getOperatorSpecGraph();
  List<WindowPane<Integer, Collection<IntegerEnvelope>>> windowPanes = new ArrayList<>();

  TestClock testClock = new TestClock();
  StreamOperatorTask task = new StreamOperatorTask(sgb, testClock);
  task.init(this.context);

  MessageCollector messageCollector =
    envelope -> windowPanes.add((WindowPane<Integer, Collection<IntegerEnvelope>>) envelope.getMessage());
  Assert.assertEquals(windowPanes.size(), 0);

  List<Integer> integerList = ImmutableList.of(1, 2, 1, 2, 1);
  integerList.forEach(n -> task.processAsync(new IntegerEnvelope(n), messageCollector, taskCoordinator, taskCallback));

  // early triggers should emit (1,2) and (1,2) in the same window.
  Assert.assertEquals(windowPanes.size(), 2);

  testClock.advanceTime(Duration.ofSeconds(1));
  Assert.assertEquals(windowPanes.size(), 2);

  final IncomingMessageEnvelope endOfStream = IncomingMessageEnvelope.buildEndOfStreamEnvelope(
      new SystemStreamPartition("kafka", "integers", new Partition(0)));
  task.processAsync(endOfStream, messageCollector, taskCoordinator, taskCallback);

  // end of stream flushes the last entry (1)
  Assert.assertEquals(windowPanes.size(), 3);
  Assert.assertEquals((windowPanes.get(0).getMessage()).size(), 2);
  verify(taskCoordinator, times(1)).commit(TaskCoordinator.RequestScope.CURRENT_TASK);
  verify(taskCoordinator, times(1)).shutdown(TaskCoordinator.RequestScope.CURRENT_TASK);
}
 
Example #21
Source File: RunLoop.java    From samza with Apache License 2.0 5 votes vote down vote up
AsyncTaskState(TaskName taskName, TaskInstanceMetrics taskMetrics, Set<SystemStreamPartition> sspSet, boolean hasIntermediateStreams) {
  this.taskName = taskName;
  this.taskMetrics = taskMetrics;
  this.pendingEnvelopeQueue = new ArrayDeque<>();
  this.processingSspSet = sspSet;
  this.hasIntermediateStreams = hasIntermediateStreams;
}
 
Example #22
Source File: TestStartpoint.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testStartpointLatest() {
  StartpointUpcoming startpoint = new StartpointUpcoming();
  Assert.assertTrue(startpoint.getCreationTimestamp() <= Instant.now().toEpochMilli());

  MockStartpointVisitor mockStartpointVisitorConsumer = new MockStartpointVisitor();
  startpoint.apply(new SystemStreamPartition("sys", "stream", new Partition(1)), mockStartpointVisitorConsumer);
  Assert.assertEquals(StartpointUpcoming.class, mockStartpointVisitorConsumer.visitedClass);
}
 
Example #23
Source File: InMemoryManager.java    From samza with Apache License 2.0 5 votes vote down vote up
/**
 * Populate the metadata for the {@link SystemStream} and initialize the buffer for {@link SystemStreamPartition}.
 *
 * @param streamSpec stream spec for the stream to be initialized
 *
 * @return true if successful, false otherwise
 */
boolean initializeStream(StreamSpec streamSpec) {
  LOG.info("Initializing the stream for {}", streamSpec.getId());
  systemStreamToPartitions.put(streamSpec.toSystemStream(), streamSpec.getPartitionCount());

  for (int partition = 0; partition < streamSpec.getPartitionCount(); partition++) {
    bufferedMessages.put(
        new SystemStreamPartition(streamSpec.toSystemStream(), new Partition(partition)),
        newSynchronizedLinkedList());
  }

  return true;
}
 
Example #24
Source File: TestTransactionalStateTaskStorageManager.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test(expected = SamzaException.class)
public void testCheckpointFailsIfErrorWritingOffsetFiles() {
  ContainerStorageManager csm = mock(ContainerStorageManager.class);

  StorageEngine mockLPStore = mock(StorageEngine.class);
  StoreProperties lpStoreProps = mock(StoreProperties.class);
  when(mockLPStore.getStoreProperties()).thenReturn(lpStoreProps);
  when(lpStoreProps.isPersistedToDisk()).thenReturn(true);
  when(lpStoreProps.isLoggedStore()).thenReturn(true);
  Path mockPath = mock(Path.class);
  when(mockLPStore.checkpoint(any())).thenReturn(Optional.of(mockPath));
  java.util.Map<String, StorageEngine> taskStores =
      ImmutableMap.of("loggedPersistentStore", mockLPStore);
  when(csm.getAllStores(any())).thenReturn(taskStores);

  TransactionalStateTaskStorageManager tsm = spy(buildTSM(csm, mock(Partition.class), new StorageManagerUtil()));
  doThrow(new SamzaException("Error writing offset file"))
      .when(tsm).writeChangelogOffsetFiles(any(), any(), any());

  Map<SystemStreamPartition, Option<String>> offsets = ScalaJavaUtil.toScalaMap(
      ImmutableMap.of(mock(SystemStreamPartition.class), Option.apply("1")));

  // invoke checkpoint
  tsm.checkpoint(CheckpointId.create(), offsets);

  fail("Should have thrown an exception if error writing offset file.");
}
 
Example #25
Source File: HdfsSystemConsumer.java    From samza with Apache License 2.0 5 votes vote down vote up
private void doPoll(MultiFileHdfsReader reader) {
  SystemStreamPartition systemStreamPartition = reader.getSystemStreamPartition();
  while (reader.hasNext() && !isShutdown) {
    IncomingMessageEnvelope messageEnvelope = reader.readNext();
    offerMessage(systemStreamPartition, messageEnvelope);
    consumerMetrics.incNumEvents(systemStreamPartition);
    consumerMetrics.incTotalNumEvents();
  }
  offerMessage(systemStreamPartition, IncomingMessageEnvelope.buildEndOfStreamEnvelope(systemStreamPartition));
  reader.close();
}
 
Example #26
Source File: TestKafkaSystemConsumer.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testFetchThresholdBytes() {

  SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0));
  SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1));
  int partitionsNum = 2;
  int ime0Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum; // fake size
  int ime1Size = Integer.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum - 1; // fake size
  int ime11Size = 20;
  ByteArraySerializer bytesSerde = new ByteArraySerializer();
  IncomingMessageEnvelope ime0 = new IncomingMessageEnvelope(ssp0, "0", bytesSerde.serialize("", "key0".getBytes()),
      bytesSerde.serialize("", "value0".getBytes()), ime0Size);
  IncomingMessageEnvelope ime1 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key1".getBytes()),
      bytesSerde.serialize("", "value1".getBytes()), ime1Size);
  IncomingMessageEnvelope ime11 = new IncomingMessageEnvelope(ssp1, "0", bytesSerde.serialize("", "key11".getBytes()),
      bytesSerde.serialize("", "value11".getBytes()), ime11Size);
  KafkaSystemConsumer consumer = createConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES);

  consumer.register(ssp0, "0");
  consumer.register(ssp1, "0");
  consumer.start();
  consumer.messageSink.addMessage(ssp0, ime0);
  // queue for ssp0 should be full now, because we added message of size FETCH_THRESHOLD_MSGS/partitionsNum
  Assert.assertFalse(consumer.messageSink.needsMoreMessages(ssp0));
  consumer.messageSink.addMessage(ssp1, ime1);
  // queue for ssp1 should be less then full now, because we added message of size (FETCH_THRESHOLD_MSGS/partitionsNum - 1)
  Assert.assertTrue(consumer.messageSink.needsMoreMessages(ssp1));
  consumer.messageSink.addMessage(ssp1, ime11);
  // queue for ssp1 should full now, because we added message of size 20 on top
  Assert.assertFalse(consumer.messageSink.needsMoreMessages(ssp1));

  Assert.assertEquals(1, consumer.getNumMessagesInQueue(ssp0));
  Assert.assertEquals(2, consumer.getNumMessagesInQueue(ssp1));
  Assert.assertEquals(ime0Size, consumer.getMessagesSizeInQueue(ssp0));
  Assert.assertEquals(ime1Size + ime11Size, consumer.getMessagesSizeInQueue(ssp1));

  consumer.stop();
}
 
Example #27
Source File: KinesisIncomingMessageEnvelope.java    From samza with Apache License 2.0 5 votes vote down vote up
public KinesisIncomingMessageEnvelope(SystemStreamPartition systemStreamPartition, String offset, Object key,
    Object message, String shardId, String sequenceNumber, Date approximateArrivalTimestamp) {
  super(systemStreamPartition, offset, key, message);
  this.shardId = shardId;
  this.sequenceNumber = sequenceNumber;
  this.approximateArrivalTimestamp = approximateArrivalTimestamp;
}
 
Example #28
Source File: KinesisSystemConsumer.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
protected void put(SystemStreamPartition ssp, IncomingMessageEnvelope envelope) {
  try {
    super.put(ssp, envelope);
  } catch (Exception e) {
    LOG.error("Exception while putting record. Shutting down SystemStream {}", ssp.getSystemStream(), e);
    Thread.currentThread().interrupt();
  }
}
 
Example #29
Source File: AzureCheckpointManager.java    From samza with Apache License 2.0 5 votes vote down vote up
private String serializeSystemStreamPartition(SystemStreamPartition ssp) {
  // Create the Json string for SystemStreamPartition
  Map<String, String> sspMap = new HashMap<>();

  sspMap.put(SYSTEM_PROP_NAME, ssp.getSystem());
  sspMap.put(STREAM_PROP_NAME, ssp.getStream());
  sspMap.put(PARTITION_PROP_NAME, String.valueOf(ssp.getPartition().getPartitionId()));

  return new String(jsonSerde.toBytes(sspMap));
}
 
Example #30
Source File: AzureCheckpointManager.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public Checkpoint readLastCheckpoint(TaskName taskName) {
  if (!taskNames.contains(taskName)) {
    throw new SamzaException("reading checkpoint of unregistered/unwritten task");
  }

  // Create the query for taskName
  String partitionQueryKey = taskName.toString();
  String partitionFilter = TableQuery.generateFilterCondition(
          PARTITION_KEY,
          TableQuery.QueryComparisons.EQUAL,
          partitionQueryKey);
  TableQuery<TaskCheckpointEntity> query = TableQuery.from(TaskCheckpointEntity.class).where(partitionFilter);

  ImmutableMap.Builder<SystemStreamPartition, String> builder = ImmutableMap.builder();
  try {
    for (TaskCheckpointEntity taskCheckpointEntity : cloudTable.execute(query)) {
      // Recreate the SSP offset
      String serializedSSP = taskCheckpointEntity.getRowKey();
      builder.put(deserializeSystemStreamPartition(serializedSSP), taskCheckpointEntity.getOffset());
    }

  } catch (NoSuchElementException e) {
    LOG.warn("No checkpoints found found for registered taskName={}", taskName);
    // Return null if not entity elements are not found
    return null;
  }
  LOG.debug("Received checkpoint state for taskName=%s", taskName);
  return new Checkpoint(builder.build());
}