Java Code Examples for org.apache.beam.sdk.io.UnboundedSource#CheckpointMark

The following examples show how to use org.apache.beam.sdk.io.UnboundedSource#CheckpointMark . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WorkerCustomSources.java    From beam with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private UnboundedSource<T, UnboundedSource.CheckpointMark> parseSource(int index) {
  List<String> serializedSplits = null;
  try {
    serializedSplits = getStrings(spec, SERIALIZED_SOURCE_SPLITS, null);
  } catch (Exception e) {
    throw new RuntimeException("Parsing serialized source splits failed: ", e);
  }
  checkArgument(serializedSplits != null, "UnboundedSource object did not contain splits");
  checkArgument(
      index < serializedSplits.size(),
      "UnboundedSource splits contained too few splits.  Requested index was %s, size was %s",
      index,
      serializedSplits.size());
  Object rawSource =
      deserializeFromByteArray(
          decodeBase64(serializedSplits.get(index)), "UnboundedSource split");
  if (!(rawSource instanceof UnboundedSource)) {
    throw new IllegalArgumentException("Expected UnboundedSource, got " + rawSource.getClass());
  }
  return (UnboundedSource<T, UnboundedSource.CheckpointMark>) rawSource;
}
 
Example 2
Source File: WorkerCustomSources.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public NativeReaderIterator<WindowedValue<ValueWithRecordId<T>>> iterator() throws IOException {
  UnboundedSource.UnboundedReader<T> reader =
      (UnboundedSource.UnboundedReader<T>) context.getCachedReader();
  final boolean started = reader != null;
  if (reader == null) {
    String key = context.getSerializedKey().toStringUtf8();
    // Key is expected to be a zero-padded integer representing the split index.
    int splitIndex = Integer.parseInt(key.substring(0, 16), 16) - 1;

    UnboundedSource<T, UnboundedSource.CheckpointMark> splitSource = parseSource(splitIndex);

    UnboundedSource.CheckpointMark checkpoint = null;
    if (splitSource.getCheckpointMarkCoder() != null) {
      checkpoint = context.getReaderCheckpoint(splitSource.getCheckpointMarkCoder());
    }

    reader = splitSource.createReader(options, checkpoint);
  }

  context.setActiveReader(reader);

  return new UnboundedReaderIterator<>(reader, context, started);
}
 
Example 3
Source File: StreamingModeExecutionContext.java    From beam with Apache License 2.0 5 votes vote down vote up
public UnboundedSource.CheckpointMark getReaderCheckpoint(
    Coder<? extends UnboundedSource.CheckpointMark> coder) {
  try {
    ByteString state = work.getSourceState().getState();
    if (state.isEmpty()) {
      return null;
    }
    return coder.decode(state.newInput(), Coder.Context.OUTER);
  } catch (IOException e) {
    throw new RuntimeException("Exception while decoding checkpoint", e);
  }
}
 
Example 4
Source File: TalendIO.java    From component-runtime with Apache License 2.0 5 votes vote down vote up
@Override
public List<? extends UnboundedSource<Record, UnboundedSource.CheckpointMark>> split(final int desiredNumSplits,
        final PipelineOptions options) {
    mapper.start();
    try {
        return mapper.split(desiredNumSplits).stream().map(UnBoundedSourceImpl::new).collect(toList());
    } finally {
        mapper.stop();
    }
}
 
Example 5
Source File: UnboundedSourceP.java    From beam with Apache License 2.0 5 votes vote down vote up
public static <T, CmT extends UnboundedSource.CheckpointMark> ProcessorMetaSupplier supplier(
    UnboundedSource<T, CmT> unboundedSource,
    SerializablePipelineOptions options,
    Coder outputCoder,
    String ownerId) {
  return new UnboundedSourceProcessorMetaSupplier<>(
      unboundedSource, options, outputCoder, ownerId);
}
 
Example 6
Source File: UnboundedSourceP.java    From beam with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private static <T, CmT extends UnboundedSource.CheckpointMark>
    UnboundedSource.UnboundedReader<T>[] createReaders(
        List<? extends UnboundedSource<T, CmT>> shards, PipelineOptions options) {
  return shards.stream()
      .map(shard -> createReader(options, shard))
      .toArray(UnboundedSource.UnboundedReader[]::new);
}
 
Example 7
Source File: RabbitMqIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public UnboundedSource.CheckpointMark getCheckpointMark() {
  return checkpointMark;
}
 
Example 8
Source File: ReadSourceTranslatorStreaming.java    From beam with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public void translateTransform(
    PTransform<PBegin, PCollection<T>> transform, TranslationContext context) {
  AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>> rootTransform =
      (AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>>)
          context.getCurrentTransform();

  UnboundedSource<T, UnboundedSource.CheckpointMark> source;
  try {
    source = ReadTranslation.unboundedSourceFromTransform(rootTransform);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  SparkSession sparkSession = context.getSparkSession();

  String serializedSource = Base64Serializer.serializeUnchecked(source);
  Dataset<Row> rowDataset =
      sparkSession
          .readStream()
          .format(sourceProviderClass)
          .option(DatasetSourceStreaming.BEAM_SOURCE_OPTION, serializedSource)
          .option(
              DatasetSourceStreaming.DEFAULT_PARALLELISM,
              String.valueOf(context.getSparkSession().sparkContext().defaultParallelism()))
          .option(
              DatasetSourceStreaming.PIPELINE_OPTIONS,
              context.getSerializableOptions().toString())
          .load();

  // extract windowedValue from Row
  WindowedValue.FullWindowedValueCoder<T> windowedValueCoder =
      WindowedValue.FullWindowedValueCoder.of(
          source.getOutputCoder(), GlobalWindow.Coder.INSTANCE);
  Dataset<WindowedValue<T>> dataset =
      rowDataset.map(
          RowHelpers.extractWindowedValueFromRowMapFunction(windowedValueCoder),
          EncoderHelpers.fromBeamCoder(windowedValueCoder));

  PCollection<T> output = (PCollection<T>) context.getOutput();
  context.putDataset(output, dataset);
}
 
Example 9
Source File: ReadTranslation.java    From beam with Apache License 2.0 4 votes vote down vote up
public static <T, CheckpointT extends UnboundedSource.CheckpointMark>
    UnboundedSource<T, CheckpointT> unboundedSourceFromTransform(
        AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>> transform)
        throws IOException {
  return (UnboundedSource<T, CheckpointT>) unboundedSourceFromProto(getReadPayload(transform));
}
 
Example 10
Source File: MqttIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public UnboundedSource.CheckpointMark getCheckpointMark() {
  return checkpointMark;
}
 
Example 11
Source File: MyUnboundedReader.java    From hazelcast-jet-demos with Apache License 2.0 4 votes vote down vote up
public MyUnboundedReader(UnboundedSource<String, UnboundedSource.CheckpointMark> arg0, String arg1)
	throws IOException {
	this.unboundedSource = arg0;
	this.fileName = arg1;
	this.watchService = FileSystems.getDefault().newWatchService();
}
 
Example 12
Source File: AmqpIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public UnboundedSource.CheckpointMark getCheckpointMark() {
  return checkpointMark;
}
 
Example 13
Source File: KinesisReader.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public UnboundedSource.CheckpointMark getCheckpointMark() {
  return shardReadersPool.getCheckpointMark();
}
 
Example 14
Source File: TalendIO.java    From component-runtime with Apache License 2.0 4 votes vote down vote up
@Override // we can add a @Checkpoint method on the emitter if needed, let's start without
public UnboundedSource.CheckpointMark getCheckpointMark() {
    return UnboundedSource.CheckpointMark.NOOP_CHECKPOINT_MARK;
}
 
Example 15
Source File: TalendIO.java    From component-runtime with Apache License 2.0 4 votes vote down vote up
@Override
public UnboundedReader<Record> createReader(final PipelineOptions options,
        final UnboundedSource.CheckpointMark checkpointMark) {
    return new UnBoundedReaderImpl<>(this, mapper.create());
}
 
Example 16
Source File: NoCheckpointCoder.java    From component-runtime with Apache License 2.0 4 votes vote down vote up
@Override
public UnboundedSource.CheckpointMark decode(final InputStream inStream) {
    return UnboundedSource.CheckpointMark.NOOP_CHECKPOINT_MARK;
}
 
Example 17
Source File: NoCheckpointCoder.java    From component-runtime with Apache License 2.0 4 votes vote down vote up
@Override
public void encode(final UnboundedSource.CheckpointMark value, final OutputStream outStream) {
    // no-op
}
 
Example 18
Source File: InMemoryQueueIO.java    From component-runtime with Apache License 2.0 4 votes vote down vote up
@Override
public UnboundedReader<Record> createReader(final PipelineOptions options,
        final UnboundedSource.CheckpointMark checkpointMark) {
    return new UnboundedQueuedReader(this);
}