org.apache.beam.sdk.io.UnboundedSource Java Examples

The following examples show how to use org.apache.beam.sdk.io.UnboundedSource. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: UnboundedSourceSystem.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * For better parallelism in Samza, we need to configure a large split number for {@link
 * UnboundedSource} like Kafka. This will most likely make each split contain a single partition,
 * and be assigned to a Samza task. A large split number is safe since the actual split is bounded
 * by the number of source partitions.
 */
private static <T, CheckpointMarkT extends CheckpointMark>
    List<UnboundedSource<T, CheckpointMarkT>> split(
        UnboundedSource<T, CheckpointMarkT> source, SamzaPipelineOptions pipelineOptions)
        throws Exception {
  final int numSplits = pipelineOptions.getMaxSourceParallelism();
  if (numSplits > 1) {
    @SuppressWarnings("unchecked")
    final List<UnboundedSource<T, CheckpointMarkT>> splits =
        (List<UnboundedSource<T, CheckpointMarkT>>) source.split(numSplits, pipelineOptions);
    // Need the empty check here because Samza doesn't handle empty partition well
    if (!splits.isEmpty()) {
      return splits;
    }
  }
  return Collections.singletonList(source);
}
 
Example #2
Source File: UnboundedSourceSystem.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public Map<String, SystemStreamMetadata> getSystemStreamMetadata(Set<String> streamNames) {
  return streamNames.stream()
      .collect(
          Collectors.toMap(
              Function.<String>identity(),
              streamName -> {
                try {
                  final List<UnboundedSource<T, CheckpointMarkT>> splits =
                      split(source, pipelineOptions);
                  final Map<Partition, SystemStreamPartitionMetadata> partitionMetaData =
                      new HashMap<>();
                  // we assume that the generated splits are stable,
                  // this is necessary so that the mapping of partition to source is correct
                  // in each container.
                  for (int i = 0; i < splits.size(); i++) {
                    partitionMetaData.put(
                        new Partition(i), new SystemStreamPartitionMetadata(null, null, null));
                  }
                  return new SystemStreamMetadata(streamName, partitionMetaData);
                } catch (Exception e) {
                  throw new SamzaException("Fail to read stream metadata", e);
                }
              }));
}
 
Example #3
Source File: ReaderCache.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * If there is a cached reader for this split and the cache token matches, the reader is
 * <i>removed</i> from the cache and returned. Cache the reader using cacheReader() as required.
 * Note that cache will expire in one minute. If cacheToken does not match the token already
 * cached, it is assumed that the cached reader (if any) is no longer relevant and will be closed.
 * Return null in case of a cache miss.
 */
UnboundedSource.UnboundedReader<?> acquireReader(
    String computationId, ByteString splitId, long cacheToken) {
  KV<String, ByteString> key = KV.of(computationId, splitId);
  CacheEntry entry = cache.asMap().remove(key);

  cache.cleanUp();

  if (entry != null) {
    if (entry.token == cacheToken) {
      return entry.reader;
    } else { // new cacheToken invalidates old one. close the reader.
      closeReader(key, entry);
    }
  }
  return null;
}
 
Example #4
Source File: TalendIO.java    From component-runtime with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<Record> expand(final PBegin incoming) {
    PTransform<PBegin, PCollection<Record>> unbounded =
            org.apache.beam.sdk.io.Read.from(new UnBoundedSourceImpl(delegate));
    if (maxRecords > 0) {
        unbounded = ((org.apache.beam.sdk.io.Read.Unbounded<Record>) unbounded).withMaxNumRecords(maxRecords);
    }
    if (maxDurationMs > 0) {
        if (UnboundedSource.class.isInstance(unbounded)) {
            unbounded = ((org.apache.beam.sdk.io.Read.Unbounded<Record>) unbounded)
                    .withMaxReadTime(Duration.millis(maxDurationMs));
        } else { // maxrecords set
            unbounded = ((BoundedReadFromUnboundedSource<Record>) unbounded)
                    .withMaxReadTime(Duration.millis(maxDurationMs));
        }
    }
    return incoming.apply(unbounded);
}
 
Example #5
Source File: WorkerCustomSources.java    From beam with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private UnboundedSource<T, UnboundedSource.CheckpointMark> parseSource(int index) {
  List<String> serializedSplits = null;
  try {
    serializedSplits = getStrings(spec, SERIALIZED_SOURCE_SPLITS, null);
  } catch (Exception e) {
    throw new RuntimeException("Parsing serialized source splits failed: ", e);
  }
  checkArgument(serializedSplits != null, "UnboundedSource object did not contain splits");
  checkArgument(
      index < serializedSplits.size(),
      "UnboundedSource splits contained too few splits.  Requested index was %s, size was %s",
      index,
      serializedSplits.size());
  Object rawSource =
      deserializeFromByteArray(
          decodeBase64(serializedSplits.get(index)), "UnboundedSource split");
  if (!(rawSource instanceof UnboundedSource)) {
    throw new IllegalArgumentException("Expected UnboundedSource, got " + rawSource.getClass());
  }
  return (UnboundedSource<T, UnboundedSource.CheckpointMark>) rawSource;
}
 
Example #6
Source File: WorkerCustomSources.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public NativeReaderIterator<WindowedValue<ValueWithRecordId<T>>> iterator() throws IOException {
  UnboundedSource.UnboundedReader<T> reader =
      (UnboundedSource.UnboundedReader<T>) context.getCachedReader();
  final boolean started = reader != null;
  if (reader == null) {
    String key = context.getSerializedKey().toStringUtf8();
    // Key is expected to be a zero-padded integer representing the split index.
    int splitIndex = Integer.parseInt(key.substring(0, 16), 16) - 1;

    UnboundedSource<T, UnboundedSource.CheckpointMark> splitSource = parseSource(splitIndex);

    UnboundedSource.CheckpointMark checkpoint = null;
    if (splitSource.getCheckpointMarkCoder() != null) {
      checkpoint = context.getReaderCheckpoint(splitSource.getCheckpointMarkCoder());
    }

    reader = splitSource.createReader(options, checkpoint);
  }

  context.setActiveReader(reader);

  return new UnboundedReaderIterator<>(reader, context, started);
}
 
Example #7
Source File: UnboundedReadFromBoundedSourceTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testBoundedToUnboundedSourceAdapter() throws Exception {
  long numElements = 100;
  BoundedSource<Long> boundedSource = CountingSource.upTo(numElements);
  UnboundedSource<Long, Checkpoint<Long>> unboundedSource =
      new BoundedToUnboundedSourceAdapter<>(boundedSource);

  PCollection<Long> output = p.apply(Read.from(unboundedSource).withMaxNumRecords(numElements));

  // Count == numElements
  PAssert.thatSingleton(output.apply("Count", Count.globally())).isEqualTo(numElements);
  // Unique count == numElements
  PAssert.thatSingleton(output.apply(Distinct.create()).apply("UniqueCount", Count.globally()))
      .isEqualTo(numElements);
  // Min == 0
  PAssert.thatSingleton(output.apply("Min", Min.globally())).isEqualTo(0L);
  // Max == numElements-1
  PAssert.thatSingleton(output.apply("Max", Max.globally())).isEqualTo(numElements - 1);
  p.run();
}
 
Example #8
Source File: DatasetSourceStreaming.java    From beam with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private DatasetMicroBatchReader(String checkpointLocation, DataSourceOptions options) {
  if (!options.get(BEAM_SOURCE_OPTION).isPresent()) {
    throw new RuntimeException("Beam source was not set in DataSource options");
  }
  this.source =
      Base64Serializer.deserializeUnchecked(
          options.get(BEAM_SOURCE_OPTION).get(), UnboundedSource.class);

  if (!options.get(DEFAULT_PARALLELISM).isPresent()) {
    throw new RuntimeException("Spark default parallelism was not set in DataSource options");
  }
  this.numPartitions = Integer.parseInt(options.get(DEFAULT_PARALLELISM).get());
  checkArgument(numPartitions > 0, "Number of partitions must be greater than zero.");

  if (!options.get(PIPELINE_OPTIONS).isPresent()) {
    throw new RuntimeException("Beam pipelineOptions were not set in DataSource options");
  }
  this.serializablePipelineOptions =
      new SerializablePipelineOptions(options.get(PIPELINE_OPTIONS).get());
}
 
Example #9
Source File: MicrobatchSource.java    From beam with Apache License 2.0 6 votes vote down vote up
MicrobatchSource(
    final UnboundedSource<T, CheckpointMarkT> source,
    final Duration maxReadTime,
    final int numInitialSplits,
    final long maxNumRecords,
    final int splitId,
    final int sourceId,
    final double readerCacheInterval) {
  this.source = source;
  this.maxReadTime = maxReadTime;
  this.numInitialSplits = numInitialSplits;
  this.maxNumRecords = maxNumRecords;
  this.splitId = splitId;
  this.sourceId = sourceId;
  this.readerCacheInterval = readerCacheInterval;
}
 
Example #10
Source File: UnboundedSourceWrapper.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Emit the current element from the given Reader. The reader is guaranteed to have data. */
private void emitElement(
    SourceContext<WindowedValue<ValueWithRecordId<OutputT>>> ctx,
    UnboundedSource.UnboundedReader<OutputT> reader) {
  // make sure that reader state update and element emission are atomic
  // with respect to snapshots
  OutputT item = reader.getCurrent();
  byte[] recordId = reader.getCurrentRecordId();
  Instant timestamp = reader.getCurrentTimestamp();

  WindowedValue<ValueWithRecordId<OutputT>> windowedValue =
      WindowedValue.of(
          new ValueWithRecordId<>(item, recordId),
          timestamp,
          GlobalWindow.INSTANCE,
          PaneInfo.NO_FIRING);
  ctx.collect(windowedValue);
}
 
Example #11
Source File: UnboundedSourceWrapper.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void initializeState(FunctionInitializationContext context) throws Exception {
  if (checkpointCoder == null) {
    // no checkpoint coder available in this source
    return;
  }

  OperatorStateStore stateStore = context.getOperatorStateStore();
  @SuppressWarnings("unchecked")
  CoderTypeInformation<KV<? extends UnboundedSource<OutputT, CheckpointMarkT>, CheckpointMarkT>>
      typeInformation = (CoderTypeInformation) new CoderTypeInformation<>(checkpointCoder);
  stateForCheckpoint =
      stateStore.getOperatorState(
          new ListStateDescriptor<>(
              DefaultOperatorStateBackend.DEFAULT_OPERATOR_STATE_NAME,
              typeInformation.createSerializer(new ExecutionConfig())));

  if (context.isRestored()) {
    isRestored = true;
    LOG.info("Restoring state in the UnboundedSourceWrapper.");
  } else {
    LOG.info("No restore state for UnboundedSourceWrapper.");
  }
}
 
Example #12
Source File: UnboundedSourceP.java    From beam with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private static <T, CmT extends UnboundedSource.CheckpointMark>
    UnboundedSource.UnboundedReader<T>[] createReaders(
        List<? extends UnboundedSource<T, CmT>> shards, PipelineOptions options) {
  return shards.stream()
      .map(shard -> createReader(options, shard))
      .toArray(UnboundedSource.UnboundedReader[]::new);
}
 
Example #13
Source File: MicrobatchSource.java    From beam with Apache License 2.0 5 votes vote down vote up
private Reader(final UnboundedSource.UnboundedReader<T> unboundedReader) {
  this.unboundedReader = unboundedReader;
  backoffFactory =
      FluentBackoff.DEFAULT
          .withInitialBackoff(Duration.millis(10))
          .withMaxBackoff(maxReadTime.minus(1))
          .withMaxCumulativeBackoff(maxReadTime.minus(1));
}
 
Example #14
Source File: UnboundedReadFromBoundedSourceTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testInvokingSplitProducesAtLeastOneSplit() throws Exception {
  UnboundedSource<Long, ?> unboundedCountingSource =
      new BoundedToUnboundedSourceAdapter<Long>(CountingSource.upTo(0));
  PipelineOptions options = PipelineOptionsFactory.create();
  List<?> splits = unboundedCountingSource.split(100, options);
  assertEquals(1, splits.size());
  assertNotEquals(splits.get(0), unboundedCountingSource);
}
 
Example #15
Source File: UnboundedReadFromBoundedSourceTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testInvokesSplitWithDefaultNumSplitsTooLarge() throws Exception {
  UnboundedSource<Long, ?> unboundedCountingSource =
      new BoundedToUnboundedSourceAdapter<Long>(CountingSource.upTo(1));
  PipelineOptions options = PipelineOptionsFactory.create();
  List<?> splits = unboundedCountingSource.split(100, options);
  assertEquals(1, splits.size());
  assertNotEquals(splits.get(0), unboundedCountingSource);
}
 
Example #16
Source File: UnboundedSourceP.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Object next() {
  if (minWatermark > lastSentWatermark) {
    lastSentWatermark = minWatermark;
    return new Watermark(lastSentWatermark);
  }

  try {
    // trying to fetch a value from the next reader
    for (int i = 0; i < readers.length; i++) {
      currentReaderIndex++;
      if (currentReaderIndex >= readers.length) {
        currentReaderIndex = 0;
      }
      UnboundedSource.UnboundedReader<InputT> currentReader = readers[currentReaderIndex];
      if (currentReader.advance()) {
        long currentWatermark = currentReader.getWatermark().getMillis();
        long origWatermark = watermarks[currentReaderIndex];
        if (currentWatermark > origWatermark) {
          watermarks[currentReaderIndex] =
              currentWatermark; // todo: we should probably do this only on a timer...
          if (origWatermark == minWatermark) {
            minWatermark = getMin(watermarks);
          }
        }
        return mapFn.apply(currentReader);
      }
    }

    // all advances have failed
    return null;
  } catch (IOException e) {
    throw ExceptionUtil.rethrow(e);
  }
}
 
Example #17
Source File: UnboundedSourceP.java    From beam with Apache License 2.0 5 votes vote down vote up
private static void stopReader(UnboundedSource.UnboundedReader<?> reader) {
  try {
    reader.close();
  } catch (IOException e) {
    throw ExceptionUtil.rethrow(e);
  }
}
 
Example #18
Source File: UnboundedSourceP.java    From beam with Apache License 2.0 5 votes vote down vote up
public static <T, CmT extends UnboundedSource.CheckpointMark> ProcessorMetaSupplier supplier(
    UnboundedSource<T, CmT> unboundedSource,
    SerializablePipelineOptions options,
    Coder outputCoder,
    String ownerId) {
  return new UnboundedSourceProcessorMetaSupplier<>(
      unboundedSource, options, outputCoder, ownerId);
}
 
Example #19
Source File: UnboundedSourceP.java    From beam with Apache License 2.0 5 votes vote down vote up
private UnboundedSourceProcessorMetaSupplier(
    UnboundedSource<T, CmT> unboundedSource,
    SerializablePipelineOptions options,
    Coder outputCoder,
    String ownerId) {
  this.unboundedSource = unboundedSource;
  this.options = options;
  this.outputCoder = outputCoder;
  this.ownerId = ownerId;
}
 
Example #20
Source File: UnboundedSourceP.java    From beam with Apache License 2.0 5 votes vote down vote up
CoalescingTraverser(
    UnboundedSource.UnboundedReader<InputT>[] readers,
    Function<UnboundedSource.UnboundedReader<InputT>, byte[]> mapFn) {
  this.readers = readers;
  watermarks = initWatermarks(readers.length);
  this.mapFn = mapFn;
}
 
Example #21
Source File: UnboundedSourceP.java    From beam with Apache License 2.0 5 votes vote down vote up
private static <T> UnboundedSource.UnboundedReader<T> createReader(
    PipelineOptions options, UnboundedSource<T, ?> shard) {
  try {
    return shard.createReader(options, null);
  } catch (IOException e) {
    throw ExceptionUtil.rethrow(e);
  }
}
 
Example #22
Source File: ReadTranslationTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testToFromProtoUnbounded() throws Exception {
  assumeThat(source, instanceOf(UnboundedSource.class));
  UnboundedSource<?, ?> unboundedSource = (UnboundedSource<?, ?>) this.source;
  Read.Unbounded<?> unboundedRead = Read.from(unboundedSource);
  SdkComponents components = SdkComponents.create();
  // No environment set for unbounded sources
  ReadPayload payload = ReadTranslation.toProto(unboundedRead, components);
  assertThat(payload.getIsBounded(), equalTo(RunnerApi.IsBounded.Enum.UNBOUNDED));
  UnboundedSource<?, ?> deserializedSource = ReadTranslation.unboundedSourceFromProto(payload);
  assertThat(deserializedSource, equalTo(source));
}
 
Example #23
Source File: ReaderCache.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Cache the reader for a minute. It will be closed if it is not acquired with in a minute. */
void cacheReader(
    String computationId,
    ByteString splitId,
    long cacheToken,
    UnboundedSource.UnboundedReader<?> reader) {
  CacheEntry existing =
      cache
          .asMap()
          .putIfAbsent(KV.of(computationId, splitId), new CacheEntry(reader, cacheToken));
  Preconditions.checkState(existing == null, "Overwriting existing readers is not allowed");
  cache.cleanUp();
}
 
Example #24
Source File: SourceDStream.java    From beam with Apache License 2.0 5 votes vote down vote up
SourceDStream(
    StreamingContext ssc,
    UnboundedSource<T, CheckpointMarkT> unboundedSource,
    SerializablePipelineOptions options,
    Long boundMaxRecords) {
  super(ssc, JavaSparkContext$.MODULE$.fakeClassTag());
  this.unboundedSource = unboundedSource;
  this.options = options;

  SparkPipelineOptions sparkOptions = options.get().as(SparkPipelineOptions.class);

  // Reader cache expiration interval. 50% of batch interval is added to accommodate latency.
  this.readerCacheInterval = 1.5 * sparkOptions.getBatchIntervalMillis();

  this.boundReadDuration =
      boundReadDuration(
          sparkOptions.getReadTimePercentage(), sparkOptions.getMinReadTimeMillis());
  // set initial parallelism once.
  this.initialParallelism = ssc().sparkContext().defaultParallelism();
  checkArgument(this.initialParallelism > 0, "Number of partitions must be greater than zero.");

  this.boundMaxRecords = boundMaxRecords;

  try {
    this.numPartitions = createMicrobatchSource().split(sparkOptions).size();
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}
 
Example #25
Source File: UnboundedSourceSystem.java    From beam with Apache License 2.0 5 votes vote down vote up
private static <T, CheckpointMarkT extends CheckpointMark>
    UnboundedSource<T, CheckpointMarkT> getUnboundedSource(Config config) {
  @SuppressWarnings("unchecked")
  final UnboundedSource<T, CheckpointMarkT> source =
      Base64Serializer.deserializeUnchecked(config.get("source"), UnboundedSource.class);
  return source;
}
 
Example #26
Source File: KafkaIOTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testUnboundedSourceCheckpointMark() throws Exception {
  int numElements = 85; // 85 to make sure some partitions have more records than other.

  // create a single split:
  UnboundedSource<KafkaRecord<Integer, Long>, KafkaCheckpointMark> source =
      mkKafkaReadTransform(numElements, new ValueAsTimestampFn())
          .makeSource()
          .split(1, PipelineOptionsFactory.create())
          .get(0);

  UnboundedReader<KafkaRecord<Integer, Long>> reader = source.createReader(null, null);
  final int numToSkip = 20; // one from each partition.

  // advance numToSkip elements
  for (int i = 0; i < numToSkip; ++i) {
    advanceOnce(reader, i > 0);
  }

  // Confirm that we get the expected element in sequence before checkpointing.
  assertEquals(numToSkip - 1, (long) reader.getCurrent().getKV().getValue());
  assertEquals(numToSkip - 1, reader.getCurrentTimestamp().getMillis());

  // Checkpoint and restart, and confirm that the source continues correctly.
  KafkaCheckpointMark mark =
      CoderUtils.clone(
          source.getCheckpointMarkCoder(), (KafkaCheckpointMark) reader.getCheckpointMark());
  reader = source.createReader(null, mark);

  // Confirm that we get the next elements in sequence.
  // This also confirms that Reader interleaves records from each partitions by the reader.

  for (int i = numToSkip; i < numElements; i++) {
    advanceOnce(reader, i > numToSkip);
    assertEquals(i, (long) reader.getCurrent().getKV().getValue());
    assertEquals(i, reader.getCurrentTimestamp().getMillis());
  }
}
 
Example #27
Source File: KafkaIOTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testUnboundedSourceSplits() throws Exception {

  int numElements = 1000;
  int numSplits = 10;

  // Coders must be specified explicitly here due to the way the transform
  // is used in the test.
  UnboundedSource<KafkaRecord<Integer, Long>, ?> initial =
      mkKafkaReadTransform(numElements, null)
          .withKeyDeserializerAndCoder(IntegerDeserializer.class, BigEndianIntegerCoder.of())
          .withValueDeserializerAndCoder(LongDeserializer.class, BigEndianLongCoder.of())
          .makeSource();

  List<? extends UnboundedSource<KafkaRecord<Integer, Long>, ?>> splits =
      initial.split(numSplits, p.getOptions());
  assertEquals("Expected exact splitting", numSplits, splits.size());

  long elementsPerSplit = numElements / numSplits;
  assertEquals("Expected even splits", numElements, elementsPerSplit * numSplits);
  PCollectionList<Long> pcollections = PCollectionList.empty(p);
  for (int i = 0; i < splits.size(); ++i) {
    pcollections =
        pcollections.and(
            p.apply("split" + i, Read.from(splits.get(i)).withMaxNumRecords(elementsPerSplit))
                .apply("Remove Metadata " + i, ParDo.of(new RemoveKafkaMetadata<>()))
                .apply("collection " + i, Values.create()));
  }
  PCollection<Long> input = pcollections.apply(Flatten.pCollections());

  addCountingAsserts(input, numElements);
  p.run();
}
 
Example #28
Source File: UnboundedReadEvaluatorFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public UnboundedSource.UnboundedReader<T> createReader(
    PipelineOptions options, @Nullable TestCheckpointMark checkpointMark) {
  checkState(
      checkpointMark == null || checkpointMark.decoded,
      "Cannot resume from a checkpoint that has not been decoded");
  readerCreatedCount++;
  return new TestUnboundedReader(elems, checkpointMark == null ? -1 : checkpointMark.index);
}
 
Example #29
Source File: UnboundedReadEvaluatorFactoryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void generatesInitialSplits() throws Exception {
  when(context.createRootBundle()).thenAnswer(invocation -> bundleFactory.createRootBundle());

  int numSplits = 5;
  Collection<CommittedBundle<?>> initialInputs =
      new UnboundedReadEvaluatorFactory.InputProvider(context, options)
          .getInitialInputs(graph.getProducer(longs), numSplits);
  // CountingSource.unbounded has very good splitting behavior
  assertThat(initialInputs, hasSize(numSplits));

  int readPerSplit = 100;
  int totalSize = numSplits * readPerSplit;
  Set<Long> expectedOutputs =
      ContiguousSet.create(Range.closedOpen(0L, (long) totalSize), DiscreteDomain.longs());

  Collection<Long> readItems = new ArrayList<>(totalSize);
  for (CommittedBundle<?> initialInput : initialInputs) {
    CommittedBundle<UnboundedSourceShard<Long, ?>> shardBundle =
        (CommittedBundle<UnboundedSourceShard<Long, ?>>) initialInput;
    WindowedValue<UnboundedSourceShard<Long, ?>> shard =
        Iterables.getOnlyElement(shardBundle.getElements());
    assertThat(shard.getTimestamp(), equalTo(BoundedWindow.TIMESTAMP_MIN_VALUE));
    assertThat(shard.getWindows(), Matchers.contains(GlobalWindow.INSTANCE));
    UnboundedSource<Long, ?> shardSource = shard.getValue().getSource();
    readItems.addAll(
        SourceTestUtils.readNItemsFromUnstartedReader(
            shardSource.createReader(
                PipelineOptionsFactory.create(), null /* No starting checkpoint */),
            readPerSplit));
  }
  assertThat(readItems, containsInAnyOrder(expectedOutputs.toArray(new Long[0])));
}
 
Example #30
Source File: MyUnboundedSource.java    From hazelcast-jet-demos with Apache License 2.0 5 votes vote down vote up
/**
 * <p>Source list, only one as ignoring splits.</p>
 *
 * @param arg0 Number of splits, ignored
 * @param arg1 Pipeline options, ignored2
 */
@Override
public List<? extends UnboundedSource<String, CheckpointMark>> split(int arg0, PipelineOptions arg1)
		throws Exception {
	List<UnboundedSource<String, CheckpointMark>> result = new ArrayList<>(Arrays.asList(this));
	return Collections.unmodifiableList(result);
}