org.apache.samza.system.IncomingMessageEnvelope Java Examples

The following examples show how to use org.apache.samza.system.IncomingMessageEnvelope. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestInMemorySystem.java    From samza with Apache License 2.0 6 votes vote down vote up
private List<IncomingMessageEnvelope> consumeRawMessages(SystemConsumer consumer, Set<SystemStreamPartition> sspsToPoll) {
  try {
    Map<SystemStreamPartition, List<IncomingMessageEnvelope>> results = consumer.poll(sspsToPoll, POLL_TIMEOUT_MS);

    return results.entrySet()
        .stream()
        .filter(entry -> entry.getValue().size() != 0)
        .map(Map.Entry::getValue)
        .flatMap(List::stream)
        .collect(Collectors.toList());
  } catch (Exception e) {
    fail("Unable to consume messages");
  }

  return new ArrayList<>();
}
 
Example #2
Source File: TestKafkaCheckpointManagerJava.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadSucceedsOnKeySerdeExceptionsWhenValidationIsDisabled() throws Exception {
  KafkaStreamSpec checkpointSpec = new KafkaStreamSpec(CHECKPOINT_TOPIC, CHECKPOINT_TOPIC,
      CHECKPOINT_SYSTEM, 1);
  Config mockConfig = mock(Config.class);
  when(mockConfig.get(JobConfig.SSP_GROUPER_FACTORY)).thenReturn(GROUPER_FACTORY_CLASS);

  // mock out a consumer that returns a single checkpoint IME
  SystemStreamPartition ssp = new SystemStreamPartition("system-1", "input-topic", new Partition(0));
  List<List<IncomingMessageEnvelope>> checkpointEnvelopes = ImmutableList.of(
      ImmutableList.of(newCheckpointEnvelope(TASK1, ssp, "0")));
  SystemConsumer mockConsumer = newConsumer(checkpointEnvelopes);

  SystemAdmin mockAdmin = newAdmin("0", "1");
  SystemFactory factory = newFactory(mock(SystemProducer.class), mockConsumer, mockAdmin);

  // wire up an exception throwing serde with the checkpointmanager
  KafkaCheckpointManager checkpointManager = new KafkaCheckpointManager(checkpointSpec, factory,
      false, mockConfig, mock(MetricsRegistry.class), new ExceptionThrowingCheckpointSerde(),
      new ExceptionThrowingCheckpointKeySerde());
  checkpointManager.register(TASK1);
  checkpointManager.start();

  // expect the read to succeed inspite of the exception from ExceptionThrowingSerde
  checkpointManager.readLastCheckpoint(TASK1);
}
 
Example #3
Source File: TestAvroSystemFactory.java    From samza with Apache License 2.0 6 votes vote down vote up
@Override
public Map<SystemStreamPartition, List<IncomingMessageEnvelope>> poll(Set<SystemStreamPartition> set, long timeout)
    throws InterruptedException {
  Map<SystemStreamPartition, List<IncomingMessageEnvelope>> envelopeMap = new HashMap<>();
  set.forEach(ssp -> {
    int curMessages = curMessagesPerSsp.get(ssp);
    // We send num Messages and an end of stream message following that.
    List<IncomingMessageEnvelope> envelopes =
        IntStream.range(curMessages, curMessages + numMessages / 4)
            .mapToObj(i -> i < numMessages ? new IncomingMessageEnvelope(ssp, null, getKey(i, ssp),
                getData(i, ssp)) : IncomingMessageEnvelope.buildEndOfStreamEnvelope(ssp))
            .collect(Collectors.toList());
    envelopeMap.put(ssp, envelopes);
    curMessagesPerSsp.put(ssp, curMessages + numMessages / 4);
  });
  if (sleepBetweenPollsMs > 0) {
    Thread.sleep(sleepBetweenPollsMs);
  }

  return envelopeMap;
}
 
Example #4
Source File: BoundedSourceSystemTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static List<IncomingMessageEnvelope> consumeUntilTimeoutOrEos(
    SystemConsumer consumer, SystemStreamPartition ssp, long timeoutMillis)
    throws InterruptedException {
  assertTrue("Expected timeoutMillis (" + timeoutMillis + ") >= 0", timeoutMillis >= 0);

  final List<IncomingMessageEnvelope> accumulator = new ArrayList<>();
  final long start = System.currentTimeMillis();
  long now = start;
  while (timeoutMillis + start >= now) {
    accumulator.addAll(pollOnce(consumer, ssp, now - start - timeoutMillis));
    if (!accumulator.isEmpty() && accumulator.get(accumulator.size() - 1).isEndOfStream()) {
      break;
    }
    now = System.currentTimeMillis();
  }
  return accumulator;
}
 
Example #5
Source File: TaskSideInputHandler.java    From samza with Apache License 2.0 6 votes vote down vote up
/**
 * Processes the incoming side input message envelope and updates the last processed offset for its SSP.
 * Synchronized inorder to be exclusive with flush().
 *
 * @param envelope incoming envelope to be processed
 */
public synchronized void process(IncomingMessageEnvelope envelope) {
  SystemStreamPartition envelopeSSP = envelope.getSystemStreamPartition();
  String envelopeOffset = envelope.getOffset();

  for (String store: this.sspToStores.get(envelopeSSP)) {
    SideInputsProcessor storeProcessor = this.storeToProcessor.get(store);
    KeyValueStore keyValueStore = (KeyValueStore) this.taskSideInputStorageManager.getStore(store);
    Collection<Entry<?, ?>> entriesToBeWritten = storeProcessor.process(envelope, keyValueStore);

    // TODO: SAMZA-2255: optimize writes to side input stores
    for (Entry entry : entriesToBeWritten) {
      // If the key is null we ignore, if the value is null, we issue a delete, else we issue a put
      if (entry.getKey() != null) {
        if (entry.getValue() != null) {
          keyValueStore.put(entry.getKey(), entry.getValue());
        } else {
          keyValueStore.delete(entry.getKey());
        }
      }
    }
  }

  this.lastProcessedOffsets.put(envelopeSSP, envelopeOffset);
}
 
Example #6
Source File: TestTaskCallbackImpl.java    From samza with Apache License 2.0 6 votes vote down vote up
@Before
public void setup() {
  completeCount = new AtomicInteger(0);
  failureCount = new AtomicInteger(0);
  throwable = null;

  listener = new TaskCallbackListener() {

    @Override
    public void onComplete(TaskCallback callback) {
      completeCount.incrementAndGet();
    }

    @Override
    public void onFailure(TaskCallback callback, Throwable t) {
      throwable = t;
      failureCount.incrementAndGet();
    }
  };

  callback = new TaskCallbackImpl(listener, null, mock(IncomingMessageEnvelope.class), null, 0L, 0L);
}
 
Example #7
Source File: UnboundedSourceSystemTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static List<IncomingMessageEnvelope> consumeUntilTimeoutOrWatermark(
    SystemConsumer consumer, SystemStreamPartition ssp, long timeoutMillis)
    throws InterruptedException {
  assertTrue("Expected timeoutMillis (" + timeoutMillis + ") >= 0", timeoutMillis >= 0);

  final List<IncomingMessageEnvelope> accumulator = new ArrayList<>();
  final long start = System.currentTimeMillis();
  long now = start;
  while (timeoutMillis + start >= now) {
    accumulator.addAll(pollOnce(consumer, ssp, now - start - timeoutMillis));
    if (!accumulator.isEmpty()
        && MessageType.of(accumulator.get(accumulator.size() - 1).getMessage())
            == MessageType.WATERMARK) {
      break;
    }
    now = System.currentTimeMillis();
  }
  return accumulator;
}
 
Example #8
Source File: HdfsSystemAdmin.java    From samza with Apache License 2.0 6 votes vote down vote up
/**
 * Compare two multi-file style offset. A multi-file style offset consist of both
 * the file index as well as the offset within that file. And the format of it is:
 * "fileIndex:offsetWithinFile"
 * For example, "2:0", "3:127"
 * Format of the offset within file is defined by the implementation of
 * {@link org.apache.samza.system.hdfs.reader.SingleFileHdfsReader} itself.
 *
 * @param offset1 First offset for comparison.
 * @param offset2 Second offset for comparison.
 * @return -1, if offset1 @lt offset2
 *          0, if offset1 == offset2
 *          1, if offset1 @gt offset2
 *          null, if not comparable
 */
@Override
public Integer offsetComparator(String offset1, String offset2) {
  if (StringUtils.isBlank(offset1) || StringUtils.isBlank(offset2)) {
    return null;
  }
  /*
   * Properly handle END_OF_STREAM offset here. If both are END_OF_STREAM,
   * then they are equal. Otherwise END_OF_STREAM is always greater than any
   * other offsets.
   */
  if (offset1.equals(IncomingMessageEnvelope.END_OF_STREAM_OFFSET)) {
    return offset2.equals(IncomingMessageEnvelope.END_OF_STREAM_OFFSET) ? 0 : 1;
  }
  if (offset2.equals(IncomingMessageEnvelope.END_OF_STREAM_OFFSET)) {
    return -1;
  }
  int fileIndex1 = MultiFileHdfsReader.getCurFileIndex(offset1);
  int fileIndex2 = MultiFileHdfsReader.getCurFileIndex(offset2);
  if (fileIndex1 == fileIndex2) {
    String offsetWithinFile1 = MultiFileHdfsReader.getCurSingleFileOffset(offset1);
    String offsetWithinFile2 = MultiFileHdfsReader.getCurSingleFileOffset(offset2);
    return HdfsReaderFactory.offsetComparator(readerType, offsetWithinFile1, offsetWithinFile2);
  }
  return Integer.compare(fileIndex1, fileIndex2);
}
 
Example #9
Source File: RunLoop.java    From samza with Apache License 2.0 6 votes vote down vote up
/**
 * Fetch the pending envelope in the pending queue for the task to process.
 * Update the chooser for flow control on the SSP level. Once it's updated, the RunLoop
 * will be able to choose new messages from this SSP for the task to process. Note that we
 * update only when the envelope is first time being processed. This solves the issue in
 * Broadcast stream where a message need to be processed by multiple tasks. In that case,
 * the envelope will be in the pendingEnvelopeQueue of each task. Only the first fetch updates
 * the chooser with the next envelope in the broadcast stream partition.
 * The function will be called in the run loop thread so no synchronization.
 * @return
 */
private IncomingMessageEnvelope fetchEnvelope() {
  PendingEnvelope pendingEnvelope = pendingEnvelopeQueue.remove();
  int queueSize = pendingEnvelopeQueue.size();
  taskMetrics.pendingMessages().set(queueSize);
  log.trace("fetch envelope ssp {} offset {} to process.",
      pendingEnvelope.envelope.getSystemStreamPartition(), pendingEnvelope.envelope.getOffset());
  log.debug("Task {} pending envelopes count is {} after fetching.", taskName, queueSize);

  if (pendingEnvelope.markProcessed()) {
    SystemStreamPartition partition = pendingEnvelope.envelope.getSystemStreamPartition();
    consumerMultiplexer.tryUpdate(partition);
    log.debug("Update chooser for {}", partition);
  }
  return pendingEnvelope.envelope;
}
 
Example #10
Source File: TestCoordinatorStreamSystemConsumer.java    From samza with Apache License 2.0 6 votes vote down vote up
public Map<SystemStreamPartition, List<IncomingMessageEnvelope>> poll(Set<SystemStreamPartition> systemStreamPartitions, long timeout) throws InterruptedException {
  Map<SystemStreamPartition, List<IncomingMessageEnvelope>> map = new LinkedHashMap<SystemStreamPartition, List<IncomingMessageEnvelope>>();
  assertEquals(1, systemStreamPartitions.size());
  SystemStreamPartition systemStreamPartition = systemStreamPartitions.iterator().next();
  assertEquals(expectedSystemStreamPartition, systemStreamPartition);

  if (pollCount++ == 0) {
    List<IncomingMessageEnvelope> list = new ArrayList<IncomingMessageEnvelope>();
    SetConfig setConfig1 = new SetConfig("test", "job.name", "my-job-name");
    SetConfig setConfig2 = new SetConfig("test", "job.id", "1234");
    Delete delete = new Delete("test", "job.name", SetConfig.TYPE);
    list.add(new IncomingMessageEnvelope(systemStreamPartition, null, serialize(setConfig1.getKeyArray()), serialize(setConfig1.getMessageMap())));
    list.add(new IncomingMessageEnvelope(systemStreamPartition, null, serialize(setConfig2.getKeyArray()), serialize(setConfig2.getMessageMap())));
    list.add(new IncomingMessageEnvelope(systemStreamPartition, null, serialize(delete.getKeyArray()), delete.getMessageMap()));
    map.put(systemStreamPartition, list);
  }

  return map;
}
 
Example #11
Source File: TestKinesisSystemConsumer.java    From samza with Apache License 2.0 6 votes vote down vote up
private Map<SystemStreamPartition, List<IncomingMessageEnvelope>> readEvents(Set<SystemStreamPartition> ssps,
    KinesisSystemConsumer consumer, int numEvents) throws InterruptedException {
  Map<SystemStreamPartition, List<IncomingMessageEnvelope>> messages = new HashMap<>();
  int totalEventsConsumed = 0;

  while (totalEventsConsumed < numEvents) {
    Map<SystemStreamPartition, List<IncomingMessageEnvelope>> receivedMessages =
        consumer.poll(ssps, Duration.ofSeconds(1).toMillis());
    receivedMessages.forEach((key, value) -> {
      if (messages.containsKey(key)) {
        messages.get(key).addAll(value);
      } else {
        messages.put(key, new ArrayList<>(value));
      }
    });
    totalEventsConsumed = messages.values().stream().mapToInt(List::size).sum();
  }

  if (totalEventsConsumed < numEvents) {
    String msg = String.format("Received only %d of %d events", totalEventsConsumed, numEvents);
    throw new SamzaException(msg);
  }
  return messages;
}
 
Example #12
Source File: UnboundedSourceSystem.java    From beam with Apache License 2.0 6 votes vote down vote up
private ReaderTask(
    Map<UnboundedReader, SystemStreamPartition> readerToSsp,
    Coder<CheckpointMarkT> checkpointMarkCoder,
    int capacity,
    long watermarkInterval,
    FnWithMetricsWrapper metricsWrapper) {
  this.readerToSsp = readerToSsp;
  this.checkpointMarkCoder = checkpointMarkCoder;
  this.readers = ImmutableList.copyOf(readerToSsp.keySet());
  this.watermarkInterval = watermarkInterval;
  this.available = new Semaphore(capacity);
  this.metricsWrapper = metricsWrapper;

  final Map<SystemStreamPartition, LinkedBlockingQueue<IncomingMessageEnvelope>> qs =
      new HashMap<>();
  readerToSsp.values().forEach(ssp -> qs.put(ssp, new LinkedBlockingQueue<>()));
  this.queues = ImmutableMap.copyOf(qs);
}
 
Example #13
Source File: TestMultiFileHdfsReader.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test(expected = SamzaException.class)
public void testReachingMaxReconnect() {
  int numMaxRetries = 3;
  SystemStreamPartition ssp = new SystemStreamPartition("hdfs", "testStream", new Partition(0));
  MultiFileHdfsReader multiReader = new MultiFileHdfsReader(HdfsReaderFactory.ReaderType.AVRO, ssp, Arrays.asList(descriptors), "0:0", numMaxRetries);
  // first read a few events, and then reconnect
  for (int i = 0; i < NUM_EVENTS / 2; i++) {
    multiReader.readNext();
  }
  for (int i = 0; i < numMaxRetries; i++) {
    IncomingMessageEnvelope envelope = multiReader.readNext();
    multiReader.reconnect();
    IncomingMessageEnvelope envelopeAfterReconnect = multiReader.readNext();
    Assert.assertEquals(envelope, envelopeAfterReconnect);
  }
  multiReader.readNext();
  multiReader.reconnect();
  Assert.fail();
}
 
Example #14
Source File: TestStreamOperatorTask.java    From samza with Apache License 2.0 6 votes vote down vote up
/**
 * Pass an invalid IME to processAsync. Any exceptions in processAsync should still get propagated through the
 * task callback.
 */
@Test
public void testExceptionsInProcessInvokesTaskCallback() throws InterruptedException {
  ExecutorService taskThreadPool = Executors.newFixedThreadPool(2);
  TaskCallback mockTaskCallback = mock(TaskCallback.class);
  MessageCollector mockMessageCollector = mock(MessageCollector.class);
  TaskCoordinator mockTaskCoordinator = mock(TaskCoordinator.class);
  StreamOperatorTask operatorTask = new StreamOperatorTask(mock(OperatorSpecGraph.class));
  operatorTask.setTaskThreadPool(taskThreadPool);

  CountDownLatch failureLatch = new CountDownLatch(1);

  doAnswer(ctx -> {
    failureLatch.countDown();
    return null;
  }).when(mockTaskCallback).failure(anyObject());

  operatorTask.processAsync(mock(IncomingMessageEnvelope.class), mockMessageCollector,
      mockTaskCoordinator, mockTaskCallback);
  failureLatch.await();
}
 
Example #15
Source File: HdfsSystemConsumer.java    From samza with Apache License 2.0 6 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public Map<SystemStreamPartition, List<IncomingMessageEnvelope>> poll(
  Set<SystemStreamPartition> systemStreamPartitions, long timeout)
  throws InterruptedException {
  systemStreamPartitions.forEach(systemStreamPartition -> {
    Future status = readerRunnableStatus.get(systemStreamPartition);
    if (status.isDone()) {
      try {
        status.get();
      } catch (ExecutionException | InterruptedException e) {
        MultiFileHdfsReader reader = readers.get(systemStreamPartition);
        LOG.warn(
          String.format("Detect failure in ReaderRunnable for ssp: %s. Try to reconnect now.", systemStreamPartition),
          e);
        reader.reconnect();
        readerRunnableStatus.put(systemStreamPartition, executorService.submit(new ReaderRunnable(reader)));
      }
    }
  });
  return super.poll(systemStreamPartitions, timeout);
}
 
Example #16
Source File: BoundedSourceSystem.java    From beam with Apache License 2.0 6 votes vote down vote up
private List<IncomingMessageEnvelope> getNextMessages(
    SystemStreamPartition ssp, long timeoutMillis) throws InterruptedException {
  if (lastException != null) {
    throw new RuntimeException(lastException);
  }

  final List<IncomingMessageEnvelope> envelopes = new ArrayList<>();
  final BlockingQueue<IncomingMessageEnvelope> queue = queues.get(ssp);
  final IncomingMessageEnvelope envelope = queue.poll(timeoutMillis, TimeUnit.MILLISECONDS);

  if (envelope != null) {
    envelopes.add(envelope);
    queue.drainTo(envelopes);
  }

  available.release(envelopes.size());

  if (lastException != null) {
    throw new RuntimeException(lastException);
  }

  return envelopes;
}
 
Example #17
Source File: StreamTaskIntegrationTest.java    From samza with Apache License 2.0 6 votes vote down vote up
@Test
public void testSyncTaskWithMultiplePartitionMultithreadedWithCustomIME() throws Exception {
  Map<Integer, List<KV>> inputPartitionData = new HashMap<>();
  Map<Integer, List<KV>> inputPartitionIME = new HashMap<>();
  Map<Integer, List<Integer>> expectedOutputPartitionData = new HashMap<>();
  genData(inputPartitionData, expectedOutputPartitionData);

  for (Map.Entry<Integer, List<KV>> entry: inputPartitionData.entrySet()) {
    Integer partitionId = entry.getKey();
    List<KV> messages = entry.getValue();
    SystemStreamPartition ssp = new SystemStreamPartition("test", "input", new Partition(partitionId));
    inputPartitionIME.put(partitionId, new ArrayList<>());
    int offset = 0;
    for (KV message: messages) {
      IncomingMessageEnvelope ime = new IncomingMessageEnvelope(ssp, String.valueOf(offset++), message.key, message.getValue());
      inputPartitionIME.get(partitionId).add(KV.of(message.key, ime));
    }
  }
  syncTaskWithMultiplePartitionMultithreadedHelper(inputPartitionIME, expectedOutputPartitionData);
}
 
Example #18
Source File: KinesisSystemConsumer.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public Map<SystemStreamPartition, List<IncomingMessageEnvelope>> poll(
    Set<SystemStreamPartition> ssps, long timeout) throws InterruptedException {
  if (callbackException != null) {
    throw new SamzaException(callbackException);
  }
  return super.poll(ssps, timeout);
}
 
Example #19
Source File: BlockingEnvelopeMap.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public Integer getValue() {
  Queue<IncomingMessageEnvelope> envelopes = bufferedMessages.get(systemStreamPartition);

  if (envelopes == null) {
    return 0;
  }

  return envelopes.size();
}
 
Example #20
Source File: TestInMemoryManager.java    From samza with Apache License 2.0 5 votes vote down vote up
private static void assertIncomingMessageEnvelope(String expectedKey, String expectedMessage, String expectedOffset,
    SystemStreamPartition expectedSystemStreamPartition, IncomingMessageEnvelope actualIncomingMessageEnvelope) {
  assertEquals(expectedKey, actualIncomingMessageEnvelope.getKey());
  assertEquals(expectedMessage, actualIncomingMessageEnvelope.getMessage());
  assertEquals(expectedOffset, actualIncomingMessageEnvelope.getOffset());
  assertEquals(expectedSystemStreamPartition, actualIncomingMessageEnvelope.getSystemStreamPartition());
}
 
Example #21
Source File: StatePerfTestTask.java    From samza with Apache License 2.0 5 votes vote down vote up
public void process(IncomingMessageEnvelope envelope, MessageCollector collector, TaskCoordinator coordinator) {
  store.put((String) envelope.getMessage(), (String) envelope.getMessage());
  count++;
  if (count % LOG_INTERVAL == 0) {
    double ellapsedSecs = (System.currentTimeMillis() - start) / 1000.0;
    System.out.println(String.format("Throughput = %.2f messages/sec.", count / ellapsedSecs));
    start = System.currentTimeMillis();
    count = 0;
    coordinator.commit(RequestScope.ALL_TASKS_IN_CONTAINER);
  }
}
 
Example #22
Source File: TestBlockingEnvelopeMap.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public IncomingMessageEnvelope poll(long timeout, TimeUnit unit) {
  this.timeout = timeout;

  pollTimeoutBarrier.countDown();

  return ENVELOPE;
}
 
Example #23
Source File: IdentityStreamTask.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public void process(
    IncomingMessageEnvelope incomingMessageEnvelope,
    MessageCollector messageCollector,
    TaskCoordinator taskCoordinator) throws Exception {
  messageCollector.send(
      new OutgoingMessageEnvelope(
          new SystemStream(outputSystem, outputTopic),
          incomingMessageEnvelope.getMessage()));
  processedMessageCount++;
  if (processedMessageCount == expectedMessageCount) {
    taskCoordinator.shutdown(TaskCoordinator.RequestScope.ALL_TASKS_IN_CONTAINER);
  }
}
 
Example #24
Source File: MockCoordinatorStreamWrappedConsumer.java    From samza with Apache License 2.0 5 votes vote down vote up
@Override
public Map<SystemStreamPartition, List<IncomingMessageEnvelope>> poll(
    Set<SystemStreamPartition> systemStreamPartitions, long timeout)
  throws InterruptedException {

  if (blockpollFlag) {
    blockConsumerPoll.await();
  }

  return super.poll(systemStreamPartitions, timeout);
}
 
Example #25
Source File: SamzaProcessingItem.java    From samoa with Apache License 2.0 5 votes vote down vote up
@Override
public void process(IncomingMessageEnvelope envelope, MessageCollector collector, TaskCoordinator coordinator) throws Exception {
	for (SamzaStream stream:this.outputStreams) {
		stream.setCollector(collector);
	}
	this.getProcessor().process((ContentEvent) envelope.getMessage());
}
 
Example #26
Source File: TestAvroFileHdfsReader.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testRandomRead() throws Exception {
  SystemStreamPartition ssp = new SystemStreamPartition("hdfs", "testStream", new Partition(0));
  SingleFileHdfsReader reader = new AvroFileHdfsReader(ssp);
  reader.open(AVRO_FILE, "0");
  for (int i = 0; i < NUM_EVENTS / 2; i++) {
    reader.readNext();
  }
  String offset = reader.nextOffset();
  IncomingMessageEnvelope envelope = reader.readNext();
  Assert.assertEquals(offset, envelope.getOffset());

  GenericRecord record1 = (GenericRecord) envelope.getMessage();

  for (int i = 0; i < 5; i++) reader.readNext();

  // seek to the offset within the same reader
  reader.seek(offset);
  Assert.assertEquals(offset, reader.nextOffset());
  envelope = reader.readNext();
  Assert.assertEquals(offset, envelope.getOffset());
  GenericRecord record2 = (GenericRecord) envelope.getMessage();
  Assert.assertEquals(record1, record2);
  reader.close();

  // open a new reader and initialize it with the offset
  reader = new AvroFileHdfsReader(ssp);
  reader.open(AVRO_FILE, offset);
  envelope = reader.readNext();
  Assert.assertEquals(offset, envelope.getOffset());
  GenericRecord record3 = (GenericRecord) envelope.getMessage();
  Assert.assertEquals(record1, record3);
  reader.close();
}
 
Example #27
Source File: KinesisSystemConsumer.java    From samza with Apache License 2.0 5 votes vote down vote up
private IncomingMessageEnvelope translate(SystemStreamPartition ssp, Record record) {
  String shardId = processors.get(ssp).getShardId();
  byte[] payload = new byte[record.getData().remaining()];

  metrics.updateMetrics(ssp.getStream(), record);
  record.getData().get(payload);
  KinesisSystemConsumerOffset offset = new KinesisSystemConsumerOffset(shardId, record.getSequenceNumber());
  return new KinesisIncomingMessageEnvelope(ssp, offset.toString(), record.getPartitionKey(),
      payload, shardId, record.getSequenceNumber(), record.getApproximateArrivalTimestamp());
}
 
Example #28
Source File: TestMultiFileHdfsReader.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testReconnect() {
  SystemStreamPartition ssp = new SystemStreamPartition("hdfs", "testStream", new Partition(0));
  MultiFileHdfsReader multiReader = new MultiFileHdfsReader(HdfsReaderFactory.ReaderType.AVRO, ssp, Arrays.asList(descriptors), "0:0");
  // first read a few events, and then reconnect
  for (int i = 0; i < NUM_EVENTS / 2; i++) {
    multiReader.readNext();
  }
  IncomingMessageEnvelope envelope = multiReader.readNext();
  multiReader.reconnect();
  IncomingMessageEnvelope envelopeAfterReconnect = multiReader.readNext();
  Assert.assertEquals(envelope, envelopeAfterReconnect);
  multiReader.close();
}
 
Example #29
Source File: TestEndOfStreamStates.java    From samza with Apache License 2.0 5 votes vote down vote up
@Test
public void testUpdate() {
  SystemStream input = new SystemStream("system", "input");
  SystemStream intermediate = new SystemStream("system", "intermediate");

  Set<SystemStreamPartition> ssps = new HashSet<>();
  SystemStreamPartition inputPartition0 = new SystemStreamPartition(input, new Partition(0));
  SystemStreamPartition intPartition0 = new SystemStreamPartition(intermediate, new Partition(0));
  SystemStreamPartition intPartition1 = new SystemStreamPartition(intermediate, new Partition(1));
  ssps.add(inputPartition0);
  ssps.add(intPartition0);
  ssps.add(intPartition1);

  Map<SystemStream, Integer> producerCounts = new HashMap<>();
  producerCounts.put(intermediate, 2);

  EndOfStreamStates endOfStreamStates = new EndOfStreamStates(ssps, producerCounts);
  assertFalse(endOfStreamStates.isEndOfStream(input));
  assertFalse(endOfStreamStates.isEndOfStream(intermediate));
  assertFalse(endOfStreamStates.allEndOfStream());

  IncomingMessageEnvelope envelope = IncomingMessageEnvelope.buildEndOfStreamEnvelope(inputPartition0);
  endOfStreamStates.update((EndOfStreamMessage) envelope.getMessage(), envelope.getSystemStreamPartition());
  assertTrue(endOfStreamStates.isEndOfStream(input));
  assertFalse(endOfStreamStates.isEndOfStream(intermediate));
  assertFalse(endOfStreamStates.allEndOfStream());

  EndOfStreamMessage eos = new EndOfStreamMessage("task 0");
  endOfStreamStates.update(eos, intPartition0);
  endOfStreamStates.update(eos, intPartition1);
  assertFalse(endOfStreamStates.isEndOfStream(intermediate));
  assertFalse(endOfStreamStates.allEndOfStream());

  eos = new EndOfStreamMessage("task 1");
  endOfStreamStates.update(eos, intPartition0);
  endOfStreamStates.update(eos, intPartition1);
  assertTrue(endOfStreamStates.isEndOfStream(intermediate));
  assertTrue(endOfStreamStates.allEndOfStream());
}
 
Example #30
Source File: BlockingEnvelopeMap.java    From samza with Apache License 2.0 5 votes vote down vote up
public BlockingEnvelopeMap(MetricsRegistry metricsRegistry, Clock clock, String metricsGroupName) {
  metricsGroupName = (metricsGroupName == null) ? this.getClass().getName() : metricsGroupName;
  this.metrics = new BlockingEnvelopeMapMetrics(metricsGroupName, metricsRegistry);
  this.bufferedMessages = new ConcurrentHashMap<SystemStreamPartition, BlockingQueue<IncomingMessageEnvelope>>();
  this.noMoreMessage = new ConcurrentHashMap<SystemStreamPartition, Boolean>();
  this.clock = clock;
  this.bufferedMessagesSize = new ConcurrentHashMap<SystemStreamPartition, AtomicLong>();
}