Java Code Examples for org.apache.beam.sdk.io.UnboundedSource#UnboundedReader

The following examples show how to use org.apache.beam.sdk.io.UnboundedSource#UnboundedReader . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WorkerCustomSources.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public NativeReaderIterator<WindowedValue<ValueWithRecordId<T>>> iterator() throws IOException {
  UnboundedSource.UnboundedReader<T> reader =
      (UnboundedSource.UnboundedReader<T>) context.getCachedReader();
  final boolean started = reader != null;
  if (reader == null) {
    String key = context.getSerializedKey().toStringUtf8();
    // Key is expected to be a zero-padded integer representing the split index.
    int splitIndex = Integer.parseInt(key.substring(0, 16), 16) - 1;

    UnboundedSource<T, UnboundedSource.CheckpointMark> splitSource = parseSource(splitIndex);

    UnboundedSource.CheckpointMark checkpoint = null;
    if (splitSource.getCheckpointMarkCoder() != null) {
      checkpoint = context.getReaderCheckpoint(splitSource.getCheckpointMarkCoder());
    }

    reader = splitSource.createReader(options, checkpoint);
  }

  context.setActiveReader(reader);

  return new UnboundedReaderIterator<>(reader, context, started);
}
 
Example 2
Source File: ReaderCache.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * If there is a cached reader for this split and the cache token matches, the reader is
 * <i>removed</i> from the cache and returned. Cache the reader using cacheReader() as required.
 * Note that cache will expire in one minute. If cacheToken does not match the token already
 * cached, it is assumed that the cached reader (if any) is no longer relevant and will be closed.
 * Return null in case of a cache miss.
 */
UnboundedSource.UnboundedReader<?> acquireReader(
    String computationId, ByteString splitId, long cacheToken) {
  KV<String, ByteString> key = KV.of(computationId, splitId);
  CacheEntry entry = cache.asMap().remove(key);

  cache.cleanUp();

  if (entry != null) {
    if (entry.token == cacheToken) {
      return entry.reader;
    } else { // new cacheToken invalidates old one. close the reader.
      closeReader(key, entry);
    }
  }
  return null;
}
 
Example 3
Source File: UnboundedSourceWrapper.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Emit the current element from the given Reader. The reader is guaranteed to have data. */
private void emitElement(
    SourceContext<WindowedValue<ValueWithRecordId<OutputT>>> ctx,
    UnboundedSource.UnboundedReader<OutputT> reader) {
  // make sure that reader state update and element emission are atomic
  // with respect to snapshots
  OutputT item = reader.getCurrent();
  byte[] recordId = reader.getCurrentRecordId();
  Instant timestamp = reader.getCurrentTimestamp();

  WindowedValue<ValueWithRecordId<OutputT>> windowedValue =
      WindowedValue.of(
          new ValueWithRecordId<>(item, recordId),
          timestamp,
          GlobalWindow.INSTANCE,
          PaneInfo.NO_FIRING);
  ctx.collect(windowedValue);
}
 
Example 4
Source File: SyntheticUnboundedSourceTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void startPositionShouldBeExclusive() throws IOException {
  int startPosition = 0;
  checkpoint = new SyntheticRecordsCheckpoint(startPosition, sourceOptions.numRecords);

  UnboundedSource.UnboundedReader<KV<byte[], byte[]>> reader =
      source.createReader(pipeline.getOptions(), checkpoint);

  reader.start();
  KV<byte[], byte[]> currentElement = reader.getCurrent();
  KV<byte[], byte[]> expectedElement = sourceOptions.genRecord(startPosition + 1).kv;

  assertEquals(expectedElement, currentElement);
}
 
Example 5
Source File: ReaderCache.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Cache the reader for a minute. It will be closed if it is not acquired with in a minute. */
void cacheReader(
    String computationId,
    ByteString splitId,
    long cacheToken,
    UnboundedSource.UnboundedReader<?> reader) {
  CacheEntry existing =
      cache
          .asMap()
          .putIfAbsent(KV.of(computationId, splitId), new CacheEntry(reader, cacheToken));
  Preconditions.checkState(existing == null, "Overwriting existing readers is not allowed");
  cache.cleanUp();
}
 
Example 6
Source File: UnboundedSourceWrapper.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void close() throws Exception {
  metricContainer.registerMetricsForPipelineResult();
  try {
    super.close();
    if (localReaders != null) {
      for (UnboundedSource.UnboundedReader<OutputT> reader : localReaders) {
        reader.close();
      }
    }
  } finally {
    Workarounds.deleteStaticCaches();
  }
}
 
Example 7
Source File: MicrobatchSource.java    From beam with Apache License 2.0 5 votes vote down vote up
private Reader(final UnboundedSource.UnboundedReader<T> unboundedReader) {
  this.unboundedReader = unboundedReader;
  backoffFactory =
      FluentBackoff.DEFAULT
          .withInitialBackoff(Duration.millis(10))
          .withMaxBackoff(maxReadTime.minus(1))
          .withMaxCumulativeBackoff(maxReadTime.minus(1));
}
 
Example 8
Source File: UnboundedSourceP.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Object next() {
  if (minWatermark > lastSentWatermark) {
    lastSentWatermark = minWatermark;
    return new Watermark(lastSentWatermark);
  }

  try {
    // trying to fetch a value from the next reader
    for (int i = 0; i < readers.length; i++) {
      currentReaderIndex++;
      if (currentReaderIndex >= readers.length) {
        currentReaderIndex = 0;
      }
      UnboundedSource.UnboundedReader<InputT> currentReader = readers[currentReaderIndex];
      if (currentReader.advance()) {
        long currentWatermark = currentReader.getWatermark().getMillis();
        long origWatermark = watermarks[currentReaderIndex];
        if (currentWatermark > origWatermark) {
          watermarks[currentReaderIndex] =
              currentWatermark; // todo: we should probably do this only on a timer...
          if (origWatermark == minWatermark) {
            minWatermark = getMin(watermarks);
          }
        }
        return mapFn.apply(currentReader);
      }
    }

    // all advances have failed
    return null;
  } catch (IOException e) {
    throw ExceptionUtil.rethrow(e);
  }
}
 
Example 9
Source File: UnboundedSourceP.java    From beam with Apache License 2.0 5 votes vote down vote up
CoalescingTraverser(
    UnboundedSource.UnboundedReader<InputT>[] readers,
    Function<UnboundedSource.UnboundedReader<InputT>, byte[]> mapFn) {
  this.readers = readers;
  watermarks = initWatermarks(readers.length);
  this.mapFn = mapFn;
}
 
Example 10
Source File: UnboundedReadEvaluatorFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public UnboundedSource.UnboundedReader<T> createReader(
    PipelineOptions options, @Nullable TestCheckpointMark checkpointMark) {
  checkState(
      checkpointMark == null || checkpointMark.decoded,
      "Cannot resume from a checkpoint that has not been decoded");
  readerCreatedCount++;
  return new TestUnboundedReader(elems, checkpointMark == null ? -1 : checkpointMark.index);
}
 
Example 11
Source File: UnboundedSourceP.java    From beam with Apache License 2.0 5 votes vote down vote up
private static <T> UnboundedSource.UnboundedReader<T> createReader(
    PipelineOptions options, UnboundedSource<T, ?> shard) {
  try {
    return shard.createReader(options, null);
  } catch (IOException e) {
    throw ExceptionUtil.rethrow(e);
  }
}
 
Example 12
Source File: UnboundedSourceP.java    From beam with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private static <T, CmT extends UnboundedSource.CheckpointMark>
    UnboundedSource.UnboundedReader<T>[] createReaders(
        List<? extends UnboundedSource<T, CmT>> shards, PipelineOptions options) {
  return shards.stream()
      .map(shard -> createReader(options, shard))
      .toArray(UnboundedSource.UnboundedReader[]::new);
}
 
Example 13
Source File: SyntheticUnboundedSourceTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void lastElementShouldBeInclusive() throws IOException {
  int endPosition = 2;
  checkpoint = new SyntheticRecordsCheckpoint(0, endPosition);

  UnboundedSource.UnboundedReader<KV<byte[], byte[]>> reader =
      source.createReader(pipeline.getOptions(), checkpoint);

  reader.start();
  reader.advance();
  KV<byte[], byte[]> currentElement = reader.getCurrent();
  KV<byte[], byte[]> expectedElement = sourceOptions.genRecord(endPosition).kv;

  assertEquals(expectedElement, currentElement);
}
 
Example 14
Source File: SyntheticUnboundedSourceTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldStartTheReaderSuccessfully() throws IOException {
  UnboundedSource.UnboundedReader<KV<byte[], byte[]>> reader =
      source.createReader(pipeline.getOptions(), checkpoint);

  boolean isStarted = reader.start();
  assertTrue(isStarted);
}
 
Example 15
Source File: UnboundedSourceP.java    From beam with Apache License 2.0 4 votes vote down vote up
SingleReaderTraverser(
    UnboundedSource.UnboundedReader<InputT> reader,
    Function<UnboundedSource.UnboundedReader<InputT>, byte[]> mapFn) {
  this.reader = reader;
  this.mapFn = mapFn;
}
 
Example 16
Source File: UnboundedSourceWrapper.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Initialize and restore state before starting execution of the source. */
@Override
public void open(Configuration parameters) throws Exception {
  FileSystems.setDefaultPipelineOptions(serializedOptions.get());
  runtimeContext = (StreamingRuntimeContext) getRuntimeContext();
  metricContainer = new FlinkMetricContainer(runtimeContext);

  // figure out which split sources we're responsible for
  int subtaskIndex = runtimeContext.getIndexOfThisSubtask();
  int numSubtasks = runtimeContext.getNumberOfParallelSubtasks();

  localSplitSources = new ArrayList<>();
  localReaders = new ArrayList<>();

  pendingCheckpoints = new LinkedHashMap<>();

  if (isRestored) {
    // restore the splitSources from the checkpoint to ensure consistent ordering
    for (KV<? extends UnboundedSource<OutputT, CheckpointMarkT>, CheckpointMarkT> restored :
        stateForCheckpoint.get()) {
      localSplitSources.add(restored.getKey());
      localReaders.add(
          restored.getKey().createReader(serializedOptions.get(), restored.getValue()));
    }
  } else {
    // initialize localReaders and localSources from scratch
    for (int i = 0; i < splitSources.size(); i++) {
      if (i % numSubtasks == subtaskIndex) {
        UnboundedSource<OutputT, CheckpointMarkT> source = splitSources.get(i);
        UnboundedSource.UnboundedReader<OutputT> reader =
            source.createReader(serializedOptions.get(), null);
        localSplitSources.add(source);
        localReaders.add(reader);
      }
    }
  }

  LOG.info(
      "Unbounded Flink Source {}/{} is reading from sources: {}",
      subtaskIndex + 1,
      numSubtasks,
      localSplitSources);
}
 
Example 17
Source File: UnboundedSourceWrapper.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void snapshotState(FunctionSnapshotContext functionSnapshotContext) throws Exception {
  if (!isRunning) {
    LOG.debug("snapshotState() called on closed source");
  } else {

    if (checkpointCoder == null) {
      // no checkpoint coder available in this source
      return;
    }

    stateForCheckpoint.clear();

    long checkpointId = functionSnapshotContext.getCheckpointId();

    // we checkpoint the sources along with the CheckpointMarkT to ensure
    // than we have a correct mapping of checkpoints to sources when
    // restoring
    List<CheckpointMarkT> checkpointMarks = new ArrayList<>(localSplitSources.size());

    for (int i = 0; i < localSplitSources.size(); i++) {
      UnboundedSource<OutputT, CheckpointMarkT> source = localSplitSources.get(i);
      UnboundedSource.UnboundedReader<OutputT> reader = localReaders.get(i);

      @SuppressWarnings("unchecked")
      CheckpointMarkT mark = (CheckpointMarkT) reader.getCheckpointMark();
      checkpointMarks.add(mark);
      KV<UnboundedSource<OutputT, CheckpointMarkT>, CheckpointMarkT> kv = KV.of(source, mark);
      stateForCheckpoint.add(kv);
    }

    // cleanup old pending checkpoints and add new checkpoint
    int diff = pendingCheckpoints.size() - MAX_NUMBER_PENDING_CHECKPOINTS;
    if (diff >= 0) {
      for (Iterator<Long> iterator = pendingCheckpoints.keySet().iterator(); diff >= 0; diff--) {
        iterator.next();
        iterator.remove();
      }
    }
    pendingCheckpoints.put(checkpointId, checkpointMarks);
  }
}
 
Example 18
Source File: StreamingModeExecutionContext.java    From beam with Apache License 2.0 4 votes vote down vote up
public void setActiveReader(UnboundedSource.UnboundedReader<?> reader) {
  checkState(activeReader == null, "not expected to be overwritten");
  activeReader = reader;
}
 
Example 19
Source File: UnboundedSourceWrapper.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Visible so that we can check this in tests. Must not be used for anything else. */
@VisibleForTesting
List<UnboundedSource.UnboundedReader<OutputT>> getLocalReaders() {
  return localReaders;
}
 
Example 20
Source File: ReaderCache.java    From beam with Apache License 2.0 4 votes vote down vote up
CacheEntry(UnboundedSource.UnboundedReader<?> reader, long token) {
  this.reader = reader;
  this.token = token;
}