Java Code Examples for org.apache.beam.sdk.io.BoundedSource#BoundedReader

The following examples show how to use org.apache.beam.sdk.io.BoundedSource#BoundedReader . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TextSourceTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {
  ReadableFile file = c.element();

  // Create a TextSource, passing null as the delimiter to use the default
  // delimiters ('\n', '\r', or '\r\n').
  TextSource textSource =
      new TextSource(file.getMetadata(), 0, file.getMetadata().sizeBytes(), null);
  String line;
  try {
    BoundedSource.BoundedReader<String> reader =
        textSource
            .createForSubrangeOfFile(file.getMetadata(), 0, file.getMetadata().sizeBytes())
            .createReader(c.getPipelineOptions());
    for (boolean more = reader.start(); more; more = reader.advance()) {
      c.output(reader.getCurrent());
    }
  } catch (IOException e) {
    throw new RuntimeException(
        "Unable to readFile: " + file.getMetadata().resourceId().toString());
  }
}
 
Example 2
Source File: AvroByteReader.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public AvroByteFileIterator iterator() throws IOException {
  BoundedSource.BoundedReader<ByteBuffer> reader;
  if (startPosition == 0 && endPosition == Long.MAX_VALUE) {
    // Read entire file (or collection of files).
    reader = avroSource.createReader(options);
  } else {
    // Read a subrange of file.
    reader =
        avroSource
            .createForSubrangeOfFile(
                FileSystems.matchSingleFileSpec(filename), startPosition, endPosition)
            .createReader(options);
  }
  return new AvroByteFileIterator((AvroReader<ByteBuffer>) reader);
}
 
Example 3
Source File: AvroTableFileAsMutations.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {
  FileShard f = c.element();

  Ddl ddl = c.sideInput(ddlView);
  Table table = ddl.table(f.getTableName());
  SerializableFunction<GenericRecord, Mutation> parseFn = new AvroRecordConverter(table);
  AvroSource<Mutation> source =
      AvroSource.from(f.getFile().getMetadata().resourceId().toString())
          .withParseFn(parseFn, SerializableCoder.of(Mutation.class));
  try {
    BoundedSource.BoundedReader<Mutation> reader =
        source
            .createForSubrangeOfFile(
                f.getFile().getMetadata(), f.getRange().getFrom(), f.getRange().getTo())
            .createReader(c.getPipelineOptions());
    for (boolean more = reader.start(); more; more = reader.advance()) {
      c.output(reader.getCurrent());
    }
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example 4
Source File: SyntheticBoundedSourceTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testIncreasingProgress() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();
  testSourceOptions.progressShape = ProgressShape.LINEAR;
  SyntheticBoundedSource source = new SyntheticBoundedSource(testSourceOptions);
  BoundedSource.BoundedReader<KV<byte[], byte[]>> reader = source.createReader(options);
  // Reader starts at 0.0 progress.
  assertEquals(0, reader.getFractionConsumed(), 1e-5);
  // Set the lastFractionConsumed < 0.0 so that we can use strict inequality in the below loop.
  double lastFractionConsumed = -1.0;
  for (boolean more = reader.start(); more; more = reader.advance()) {
    assertTrue(reader.getFractionConsumed() > lastFractionConsumed);
    lastFractionConsumed = reader.getFractionConsumed();
  }
  assertEquals(1, reader.getFractionConsumed(), 1e-5);
}
 
Example 5
Source File: SyntheticBoundedSourceTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testRegressingProgress() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();
  testSourceOptions.progressShape = ProgressShape.LINEAR_REGRESSING;
  SyntheticBoundedSource source = new SyntheticBoundedSource(testSourceOptions);
  BoundedSource.BoundedReader<KV<byte[], byte[]>> reader = source.createReader(options);
  double lastFractionConsumed = reader.getFractionConsumed();
  for (boolean more = reader.start(); more; more = reader.advance()) {
    assertTrue(reader.getFractionConsumed() <= lastFractionConsumed);
    lastFractionConsumed = reader.getFractionConsumed();
  }
}
 
Example 6
Source File: SourceRDD.java    From beam with Apache License 2.0 5 votes vote down vote up
private BoundedSource.BoundedReader<T> createReader(SourcePartition<T> partition) {
  try {
    return ((BoundedSource<T>) partition.source).createReader(options.get());
  } catch (IOException e) {
    throw new RuntimeException("Failed to create reader from a BoundedSource.", e);
  }
}
 
Example 7
Source File: SourceTestUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Assert that a {@code Reader} returns a {@code Source} that, when read from, produces the same
 * records as the reader.
 */
public static <T> void assertUnstartedReaderReadsSameAsItsSource(
    BoundedSource.BoundedReader<T> reader, PipelineOptions options) throws Exception {
  Coder<T> coder = reader.getCurrentSource().getOutputCoder();
  List<T> expected = readFromUnstartedReader(reader);
  List<T> actual = readFromSource(reader.getCurrentSource(), options);
  List<ReadableStructuralValue<T>> expectedStructural = createStructuralValues(coder, expected);
  List<ReadableStructuralValue<T>> actualStructural = createStructuralValues(coder, actual);
  assertThat(actualStructural, containsInAnyOrder(expectedStructural.toArray()));
}
 
Example 8
Source File: SourceTestUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Reads all elements from the given {@link BoundedSource}. */
public static <T> List<T> readFromSource(BoundedSource<T> source, PipelineOptions options)
    throws IOException {
  try (BoundedSource.BoundedReader<T> reader = source.createReader(options)) {
    return readFromUnstartedReader(reader);
  }
}
 
Example 9
Source File: BeamBoundedSourceVertex.java    From nemo with Apache License 2.0 5 votes vote down vote up
@Override
public Iterable<T> read() throws Exception {
  final ArrayList<T> elements = new ArrayList<>();
  try (BoundedSource.BoundedReader<T> reader = boundedSource.createReader(null)) {
    for (boolean available = reader.start(); available; available = reader.advance()) {
      elements.add(reader.getCurrent());
    }
  }
  return elements;
}
 
Example 10
Source File: Twister2BoundedSource.java    From twister2 with Apache License 2.0 5 votes vote down vote up
private BoundedSource.BoundedReader<T> createReader(Source<T> partition) {
  try {
    return ((BoundedSource<T>) partition).createReader(options);
  } catch (IOException e) {
    throw new RuntimeException("Failed to create reader from a BoundedSource.", e);
  }
}
 
Example 11
Source File: SourceTestUtils.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <T> SourceTestUtils.SplitAtFractionResult assertSplitAtFractionBehaviorImpl(
    BoundedSource<T> source,
    List<T> expectedItems,
    int numItemsToReadBeforeSplit,
    double splitFraction,
    ExpectedSplitOutcome expectedOutcome,
    PipelineOptions options)
    throws Exception {
  try (BoundedSource.BoundedReader<T> reader = source.createReader(options)) {
    BoundedSource<T> originalSource = reader.getCurrentSource();
    List<T> currentItems = readNItemsFromUnstartedReader(reader, numItemsToReadBeforeSplit);
    BoundedSource<T> residual = reader.splitAtFraction(splitFraction);
    if (residual != null) {
      assertFalse(
          String.format(
              "Primary source didn't change after a successful split of %s at %f "
                  + "after reading %d items. "
                  + "Was the source object mutated instead of creating a new one? "
                  + "Source objects MUST be immutable.",
              source, splitFraction, numItemsToReadBeforeSplit),
          reader.getCurrentSource() == originalSource);
      assertFalse(
          String.format(
              "Residual source equal to original source after a successful split of %s at %f "
                  + "after reading %d items. "
                  + "Was the source object mutated instead of creating a new one? "
                  + "Source objects MUST be immutable.",
              source, splitFraction, numItemsToReadBeforeSplit),
          reader.getCurrentSource() == residual);
    }
    // Failure cases are: must succeed but fails; must fail but succeeds.
    switch (expectedOutcome) {
      case MUST_SUCCEED_AND_BE_CONSISTENT:
        assertNotNull(
            "Failed to split reader of source: "
                + source
                + " at "
                + splitFraction
                + " after reading "
                + numItemsToReadBeforeSplit
                + " items",
            residual);
        break;
      case MUST_FAIL:
        assertEquals(null, residual);
        break;
      case MUST_BE_CONSISTENT_IF_SUCCEEDS:
        // Nothing.
        break;
    }
    currentItems.addAll(readRemainingFromReader(reader, numItemsToReadBeforeSplit > 0));
    BoundedSource<T> primary = reader.getCurrentSource();
    return verifySingleSplitAtFractionResult(
        source,
        expectedItems,
        currentItems,
        primary,
        residual,
        numItemsToReadBeforeSplit,
        splitFraction,
        options);
  }
}
 
Example 12
Source File: SourceTestUtils.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <T> boolean assertSplitAtFractionConcurrent(
    ExecutorService executor,
    BoundedSource<T> source,
    List<T> expectedItems,
    final int numItemsToReadBeforeSplitting,
    final double fraction,
    PipelineOptions options)
    throws Exception {
  @SuppressWarnings("resource") // Closed in readerThread
  final BoundedSource.BoundedReader<T> reader = source.createReader(options);
  final CountDownLatch unblockSplitter = new CountDownLatch(1);
  Future<List<T>> readerThread =
      executor.submit(
          () -> {
            try {
              List<T> items =
                  readNItemsFromUnstartedReader(reader, numItemsToReadBeforeSplitting);
              unblockSplitter.countDown();
              items.addAll(readRemainingFromReader(reader, numItemsToReadBeforeSplitting > 0));
              return items;
            } finally {
              reader.close();
            }
          });
  Future<KV<BoundedSource<T>, BoundedSource<T>>> splitterThread =
      executor.submit(
          () -> {
            unblockSplitter.await();
            BoundedSource<T> residual = reader.splitAtFraction(fraction);
            if (residual == null) {
              return null;
            }
            return KV.of(reader.getCurrentSource(), residual);
          });
  List<T> currentItems = readerThread.get();
  KV<BoundedSource<T>, BoundedSource<T>> splitSources = splitterThread.get();
  if (splitSources == null) {
    return false;
  }
  SplitAtFractionResult res =
      verifySingleSplitAtFractionResult(
          source,
          expectedItems,
          currentItems,
          splitSources.getKey(),
          splitSources.getValue(),
          numItemsToReadBeforeSplitting,
          fraction,
          options);
  return (res.numResidualItems > 0);
}
 
Example 13
Source File: BoundedReaderWithLimit.java    From components with Apache License 2.0 4 votes vote down vote up
private BoundedReaderWithLimit(BoundedSource.BoundedReader<T> delegate, int limit, AtomicInteger count) {
    this.delegate = delegate;
    this.limit = limit;
    this.count = count;
}
 
Example 14
Source File: MongoDbGridFSIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public BoundedSource.BoundedReader<ObjectId> createReader(PipelineOptions options)
    throws IOException {
  return new GridFSReader(this, objectIds);
}
 
Example 15
Source File: BoundedReadEvaluatorFactoryTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public BoundedSource.BoundedReader<T> createReader(PipelineOptions options) throws IOException {
  subrangesCompleted = new CountDownLatch(2);
  return new TestReader<>(this, firstSplitIndex, subrangesCompleted);
}
 
Example 16
Source File: BoundedReaderWithLimit.java    From components with Apache License 2.0 4 votes vote down vote up
public static <T, SourceT extends BoundedSource<T>> BoundedReaderWithLimit<T, SourceT> of(
        BoundedSource.BoundedReader<T> delegate, int limit, AtomicInteger count) {
    return new BoundedReaderWithLimit<>(delegate, limit, count);
}
 
Example 17
Source File: WorkerCustomSources.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public BoundedSource.BoundedReader<T> createReader(PipelineOptions options) throws IOException {
  throw new UnsupportedOperationException(
      "SplittableOnlyBoundedSource only supports splitting.");
}
 
Example 18
Source File: WorkerCustomSources.java    From beam with Apache License 2.0 4 votes vote down vote up
private BoundedReaderIterator(BoundedSource.BoundedReader<T> reader) {
  this.reader = reader;
}
 
Example 19
Source File: KuduService.java    From beam with Apache License 2.0 2 votes vote down vote up
/**
 * Returns a {@link org.apache.beam.sdk.io.BoundedSource.BoundedReader} that will read from Kudu
 * using the spec from {@link org.apache.beam.sdk.io.kudu.KuduIO.KuduSource}.
 */
BoundedSource.BoundedReader<T> createReader(KuduIO.KuduSource<T> source);
 
Example 20
Source File: FileSourceBase.java    From components with Apache License 2.0 2 votes vote down vote up
/**
 * @param serializableSplit the split that the source is processing.
 * @return a reader created for this source.
 * @throws IOException If the reader can't be created.
 */
protected abstract BoundedSource.BoundedReader<KV<K, V>> createReaderForSplit(SerializableSplit serializableSplit)
        throws IOException;