org.apache.beam.sdk.io.BoundedSource Java Examples

The following examples show how to use org.apache.beam.sdk.io.BoundedSource. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HadoopFormatIOReadTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Test reading if InputFormat implements {@link org.apache.hadoop.conf.Configurable
 * Configurable}.
 */
@Test
public void testReadingWithConfigurableInputFormat() throws Exception {
  List<BoundedSource<KV<Text, Employee>>> boundedSourceList =
      getBoundedSourceList(
          ConfigurableEmployeeInputFormat.class,
          Text.class,
          Employee.class,
          WritableCoder.of(Text.class),
          AvroCoder.of(Employee.class));
  for (BoundedSource<KV<Text, Employee>> source : boundedSourceList) {
    // Cast to HadoopInputFormatBoundedSource to access getInputFormat().
    HadoopInputFormatBoundedSource<Text, Employee> hifSource =
        (HadoopInputFormatBoundedSource<Text, Employee>) source;
    hifSource.createInputFormatInstance();
    ConfigurableEmployeeInputFormat inputFormatObj =
        (ConfigurableEmployeeInputFormat) hifSource.getInputFormat();
    assertTrue(inputFormatObj.isConfSet);
  }
}
 
Example #2
Source File: AvroTableFileAsMutations.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {
  FileShard f = c.element();

  Ddl ddl = c.sideInput(ddlView);
  Table table = ddl.table(f.getTableName());
  SerializableFunction<GenericRecord, Mutation> parseFn = new AvroRecordConverter(table);
  AvroSource<Mutation> source =
      AvroSource.from(f.getFile().getMetadata().resourceId().toString())
          .withParseFn(parseFn, SerializableCoder.of(Mutation.class));
  try {
    BoundedSource.BoundedReader<Mutation> reader =
        source
            .createForSubrangeOfFile(
                f.getFile().getMetadata(), f.getRange().getFrom(), f.getRange().getTo())
            .createReader(c.getPipelineOptions());
    for (boolean more = reader.start(); more; more = reader.advance()) {
      c.output(reader.getCurrent());
    }
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example #3
Source File: TextSourceTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {
  ReadableFile file = c.element();

  // Create a TextSource, passing null as the delimiter to use the default
  // delimiters ('\n', '\r', or '\r\n').
  TextSource textSource =
      new TextSource(file.getMetadata(), 0, file.getMetadata().sizeBytes(), null);
  String line;
  try {
    BoundedSource.BoundedReader<String> reader =
        textSource
            .createForSubrangeOfFile(file.getMetadata(), 0, file.getMetadata().sizeBytes())
            .createReader(c.getPipelineOptions());
    for (boolean more = reader.start(); more; more = reader.advance()) {
      c.output(reader.getCurrent());
    }
  } catch (IOException e) {
    throw new RuntimeException(
        "Unable to readFile: " + file.getMetadata().resourceId().toString());
  }
}
 
Example #4
Source File: SourceTestUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Asserts that the {@code source}'s reader either fails to {@code splitAtFraction(fraction)}
 * after reading {@code numItemsToReadBeforeSplit} items, or succeeds in a way that is consistent
 * according to {@link #assertSplitAtFractionSucceedsAndConsistent}.
 *
 * <p>Returns SplitAtFractionResult.
 */
public static <T> SplitAtFractionResult assertSplitAtFractionBehavior(
    BoundedSource<T> source,
    int numItemsToReadBeforeSplit,
    double splitFraction,
    ExpectedSplitOutcome expectedOutcome,
    PipelineOptions options)
    throws Exception {
  return assertSplitAtFractionBehaviorImpl(
      source,
      readFromSource(source, options),
      numItemsToReadBeforeSplit,
      splitFraction,
      expectedOutcome,
      options);
}
 
Example #5
Source File: UnboundedReadFromBoundedSourceTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private <T> void testBoundedToUnboundedSourceAdapterCheckpoint(
    BoundedSource<T> boundedSource, List<T> expectedElements) throws Exception {
  BoundedToUnboundedSourceAdapter<T> unboundedSource =
      new BoundedToUnboundedSourceAdapter<>(boundedSource);

  PipelineOptions options = PipelineOptionsFactory.create();
  BoundedToUnboundedSourceAdapter<T>.Reader reader = unboundedSource.createReader(options, null);

  List<T> actual = Lists.newArrayList();
  for (boolean hasNext = reader.start(); hasNext; hasNext = reader.advance()) {
    actual.add(reader.getCurrent());
    // checkpoint every 9 elements
    if (actual.size() % 9 == 0) {
      Checkpoint<T> checkpoint = reader.getCheckpointMark();
      checkpoint.finalizeCheckpoint();
    }
  }
  Checkpoint<T> checkpointDone = reader.getCheckpointMark();
  assertTrue(
      checkpointDone.getResidualElements() == null
          || checkpointDone.getResidualElements().isEmpty());

  assertEquals(expectedElements.size(), actual.size());
  assertEquals(Sets.newHashSet(expectedElements), Sets.newHashSet(actual));
}
 
Example #6
Source File: SourceTestUtilsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testToUnsplittableSource() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();
  BoundedSource<Long> baseSource = CountingSource.upTo(100);
  BoundedSource<Long> unsplittableSource = SourceTestUtils.toUnsplittableSource(baseSource);
  List<?> splits = unsplittableSource.split(1, options);
  assertEquals(1, splits.size());
  assertEquals(unsplittableSource, splits.get(0));

  BoundedReader<Long> unsplittableReader = unsplittableSource.createReader(options);
  assertEquals(0, unsplittableReader.getFractionConsumed(), 1e-15);

  Set<Long> expected = Sets.newHashSet(SourceTestUtils.readFromSource(baseSource, options));
  Set<Long> actual = Sets.newHashSet();
  actual.addAll(SourceTestUtils.readNItemsFromUnstartedReader(unsplittableReader, 40));
  assertNull(unsplittableReader.splitAtFraction(0.5));
  actual.addAll(SourceTestUtils.readRemainingFromReader(unsplittableReader, true /* started */));
  assertEquals(1, unsplittableReader.getFractionConsumed(), 1e-15);

  assertEquals(100, actual.size());
  assertEquals(Sets.newHashSet(expected), Sets.newHashSet(actual));
}
 
Example #7
Source File: BoundedSourceRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testRunReadLoopWithMultipleSources() throws Exception {
  List<WindowedValue<Long>> out1Values = new ArrayList<>();
  List<WindowedValue<Long>> out2Values = new ArrayList<>();
  Collection<FnDataReceiver<WindowedValue<Long>>> consumers =
      ImmutableList.of(out1Values::add, out2Values::add);

  BoundedSourceRunner<BoundedSource<Long>, Long> runner =
      new BoundedSourceRunner<>(
          PipelineOptionsFactory.create(),
          RunnerApi.FunctionSpec.getDefaultInstance(),
          consumers);

  runner.runReadLoop(valueInGlobalWindow(CountingSource.upTo(2)));
  runner.runReadLoop(valueInGlobalWindow(CountingSource.upTo(1)));

  assertThat(
      out1Values,
      contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L), valueInGlobalWindow(0L)));
  assertThat(
      out2Values,
      contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L), valueInGlobalWindow(0L)));
}
 
Example #8
Source File: HadoopFormatIOReadTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * This test validates records emitted in PCollection are immutable if InputFormat's {@link
 * org.apache.hadoop.mapreduce.RecordReader RecordReader} returns different objects (i.e.
 * different locations in memory).
 */
@Test
public void testImmutablityOfOutputOfReadIfRecordReaderObjectsAreImmutable() throws Exception {
  List<BoundedSource<KV<Text, Employee>>> boundedSourceList =
      getBoundedSourceList(
          EmployeeInputFormat.class,
          Text.class,
          Employee.class,
          WritableCoder.of(Text.class),
          AvroCoder.of(Employee.class));
  List<KV<Text, Employee>> bundleRecords = new ArrayList<>();
  for (BoundedSource<KV<Text, Employee>> source : boundedSourceList) {
    List<KV<Text, Employee>> elems = SourceTestUtils.readFromSource(source, p.getOptions());
    bundleRecords.addAll(elems);
  }
  List<KV<Text, Employee>> referenceRecords = TestEmployeeDataSet.getEmployeeData();
  assertThat(bundleRecords, containsInAnyOrder(referenceRecords.toArray()));
}
 
Example #9
Source File: XmlSourceTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadXMLTiny() throws IOException {
  File file = tempFolder.newFile("trainXMLTiny");
  Files.write(file.toPath(), tinyXML.getBytes(StandardCharsets.UTF_8));

  BoundedSource<Train> source =
      XmlIO.<Train>read()
          .from(file.toPath().toString())
          .withRootElement("trains")
          .withRecordElement("train")
          .withRecordClass(Train.class)
          .withMinBundleSize(1024)
          .createSource();

  List<Train> expectedResults =
      ImmutableList.of(
          new Train("Thomas", Train.TRAIN_NUMBER_UNDEFINED, null, null),
          new Train("Henry", Train.TRAIN_NUMBER_UNDEFINED, null, null),
          new Train("James", Train.TRAIN_NUMBER_UNDEFINED, null, null));

  assertThat(
      trainsToStrings(expectedResults),
      containsInAnyOrder(
          trainsToStrings(readEverythingFromReader(source.createReader(null))).toArray()));
}
 
Example #10
Source File: FileSourceBase.java    From components with Apache License 2.0 6 votes vote down vote up
@Override
protected List<? extends BoundedSource<KV<K, V>>> doAsSplitIntoBundles(long desiredBundleSizeBytes, PipelineOptions options)
        throws Exception {
    // Re-implementation of the base class method to use the factory methods.
    long splitSize = limit >= 0 ? Math.max(desiredBundleSizeBytes, 10 * 1024 * 1024) : desiredBundleSizeBytes;

    if (serializableSplit == null) {
        return Lists.transform(computeSplits(splitSize), new Function<InputSplit, BoundedSource<KV<K, V>>>() {

            @Override
            public BoundedSource<KV<K, V>> apply(@Nullable InputSplit inputSplit) {
                return createSourceForSplit(new SerializableSplit(inputSplit));
            }
        });
    } else {
        return ImmutableList.of(this);
    }
}
 
Example #11
Source File: UnboundedReadFromBoundedSource.java    From beam with Apache License 2.0 5 votes vote down vote up
Reader(
    @Nullable List<TimestampedValue<T>> residualElementsList,
    @Nullable BoundedSource<T> residualSource,
    PipelineOptions options) {
  init(residualElementsList, residualSource, options);
  this.options = checkNotNull(options, "options");
  this.done = false;
}
 
Example #12
Source File: TCompBoundedSourceAdapter.java    From components with Apache License 2.0 5 votes vote down vote up
@Override
public List<? extends BoundedSource<IndexedRecord>> split(long desiredBundleSizeBytes, PipelineOptions options)
        throws Exception {
    List<? extends org.talend.components.api.component.runtime.BoundedSource> boundedSources = tCompSource
            .splitIntoBundles(desiredBundleSizeBytes, null);
    List<TCompBoundedSourceAdapter> sources = new ArrayList();
    for (org.talend.components.api.component.runtime.BoundedSource boundedSource : boundedSources) {
        sources.add(new TCompBoundedSourceAdapter(boundedSource));
    }
    return sources;
}
 
Example #13
Source File: CreateTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testSourceSplitVoid() throws Exception {
  CreateSource<Void> source =
      CreateSource.fromIterable(Lists.newArrayList(null, null, null, null, null), VoidCoder.of());
  PipelineOptions options = PipelineOptionsFactory.create();
  List<? extends BoundedSource<Void>> splitSources = source.split(3, options);
  SourceTestUtils.assertSourcesEqualReferenceSource(source, splitSources, options);
}
 
Example #14
Source File: CreateTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testSourceSplit() throws Exception {
  CreateSource<Integer> source =
      CreateSource.fromIterable(
          ImmutableList.of(1, 2, 3, 4, 5, 6, 7, 8), BigEndianIntegerCoder.of());
  PipelineOptions options = PipelineOptionsFactory.create();
  List<? extends BoundedSource<Integer>> splitSources = source.split(12, options);
  assertThat(splitSources, hasSize(3));
  SourceTestUtils.assertSourcesEqualReferenceSource(source, splitSources, options);
}
 
Example #15
Source File: ReadSourceTranslatorBatch.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(Read.Bounded<T> transform, Twister2BatchTranslationContext context) {
  BoundedSource<T> boundedSource = transform.getSource();
  Twister2BoundedSource<T> twister2BoundedSource =
      new Twister2BoundedSource<T>(boundedSource, context, context.getOptions());
  final TSetEnvironment tsetEnv = context.getEnvironment();

  SourceTSet<WindowedValue<T>> sourceTSet =
      ((BatchTSetEnvironment) tsetEnv)
          .createSource(twister2BoundedSource, context.getOptions().getParallelism());
  PCollection<T> output = context.getOutput(transform);
  context.setOutputDataSet(output, sourceTSet);
}
 
Example #16
Source File: SourceTestUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Asserts that the {@code source}'s reader fails to {@code splitAtFraction(fraction)} after
 * reading {@code numItemsToReadBeforeSplit} items.
 */
public static <T> void assertSplitAtFractionFails(
    BoundedSource<T> source,
    int numItemsToReadBeforeSplit,
    double splitFraction,
    PipelineOptions options)
    throws Exception {
  assertSplitAtFractionBehavior(
      source, numItemsToReadBeforeSplit, splitFraction, ExpectedSplitOutcome.MUST_FAIL, options);
}
 
Example #17
Source File: HadoopFormatIOReadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private <K, V> List<BoundedSource<KV<K, V>>> getBoundedSourceList(
    Class<?> inputFormatClass,
    Class<K> inputFormatKeyClass,
    Class<V> inputFormatValueClass,
    Coder<K> keyCoder,
    Coder<V> valueCoder)
    throws Exception {
  HadoopInputFormatBoundedSource<K, V> boundedSource =
      getTestHIFSource(
          inputFormatClass, inputFormatKeyClass, inputFormatValueClass, keyCoder, valueCoder);
  return boundedSource.split(0, p.getOptions());
}
 
Example #18
Source File: WorkerCustomSourcesTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public List<? extends BoundedSource<Integer>> split(
    long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
  Preconditions.checkState(errorMessage == null, "Unexpected invalid source");
  return Arrays.asList(
      new SourceProducingInvalidSplits("goodBundle", null),
      new SourceProducingInvalidSplits("badBundle", "intentionally invalid"));
}
 
Example #19
Source File: WorkItemStatusClientTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void populateSplitResultCustomReader() throws Exception {
  WorkItemStatus status = new WorkItemStatus();
  statusClient.setWorker(worker, executionContext);
  BoundedSource<Integer> primary = new DummyBoundedSource(5);
  BoundedSource<Integer> residual = new DummyBoundedSource(10);

  BoundedSourceSplit<Integer> split = new BoundedSourceSplit<>(primary, residual);
  statusClient.populateSplitResult(status, split);

  assertThat(status.getDynamicSourceSplit(), equalTo(WorkerCustomSources.toSourceSplit(split)));
  assertThat(status.getStopPosition(), nullValue());
}
 
Example #20
Source File: XmlSourceTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testSplitAtFractionExhaustiveSingleByte() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();
  File file = tempFolder.newFile("trainXMLSmall");
  Files.write(file.toPath(), trainXMLWithAllFeaturesSingleByte.getBytes(StandardCharsets.UTF_8));

  BoundedSource<Train> source =
      XmlIO.<Train>read()
          .from(file.toPath().toString())
          .withRootElement("trains")
          .withRecordElement("train")
          .withRecordClass(Train.class)
          .createSource();
  assertSplitAtFractionExhaustive(source, options);
}
 
Example #21
Source File: BigQueryIOStorageReadTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testTableSourceInitialSplit_EmptyTable() throws Exception {
  fakeDatasetService.createDataset("foo.com:project", "dataset", "", "", null);
  TableReference tableRef = BigQueryHelpers.parseTableSpec("foo.com:project:dataset.table");

  Table table =
      new Table()
          .setTableReference(tableRef)
          .setNumBytes(1024L * 1024L)
          .setSchema(new TableSchema());

  fakeDatasetService.createTable(table);

  CreateReadSessionRequest expectedRequest =
      CreateReadSessionRequest.newBuilder()
          .setParent("projects/project-id")
          .setTableReference(BigQueryHelpers.toTableRefProto(tableRef))
          .setRequestedStreams(1024)
          .setShardingStrategy(ShardingStrategy.BALANCED)
          .build();

  ReadSession emptyReadSession = ReadSession.newBuilder().build();
  StorageClient fakeStorageClient = mock(StorageClient.class);
  when(fakeStorageClient.createReadSession(expectedRequest)).thenReturn(emptyReadSession);

  BigQueryStorageTableSource<TableRow> tableSource =
      BigQueryStorageTableSource.create(
          ValueProvider.StaticValueProvider.of(tableRef),
          null,
          null,
          null,
          new TableRowParser(),
          TableRowJsonCoder.of(),
          new FakeBigQueryServices()
              .withDatasetService(fakeDatasetService)
              .withStorageClient(fakeStorageClient));

  List<? extends BoundedSource<TableRow>> sources = tableSource.split(1024L, options);
  assertTrue(sources.isEmpty());
}
 
Example #22
Source File: BoundedReadEvaluatorFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void processElement(WindowedValue<BoundedSourceShard<OutputT>> element)
    throws Exception {
  BoundedSource<OutputT> source = element.getValue().getSource();
  try (final BoundedReader<OutputT> reader = source.createReader(options)) {
    boolean contentsRemaining = reader.start();
    Future<BoundedSource<OutputT>> residualFuture = startDynamicSplitThread(source, reader);
    UncommittedBundle<OutputT> output = evaluationContext.createBundle(outputPCollection);
    while (contentsRemaining) {
      output.add(
          WindowedValue.timestampedValueInGlobalWindow(
              reader.getCurrent(), reader.getCurrentTimestamp()));
      contentsRemaining = reader.advance();
    }
    resultBuilder.addOutput(output);
    try {
      BoundedSource<OutputT> residual = residualFuture.get();
      if (residual != null) {
        resultBuilder.addUnprocessedElements(
            element.withValue(BoundedSourceShard.of(residual)));
      }
    } catch (ExecutionException exex) {
      // Un-and-rewrap the exception thrown by attempting to split
      throw UserCodeException.wrap(exex.getCause());
    }
  }
}
 
Example #23
Source File: UnboundedReadFromBoundedSource.java    From beam with Apache License 2.0 5 votes vote down vote up
Checkpoint<T> getCheckpointMark() {
  if (reader == null) {
    // Reader hasn't started, checkpoint the residualSource.
    return new Checkpoint<>(null /* residualElements */, residualSource);
  } else {
    // Part of residualSource are consumed.
    // Splits the residualSource and tracks the new residualElements in current source.
    BoundedSource<T> residualSplit = null;
    Double fractionConsumed = reader.getFractionConsumed();
    if (fractionConsumed != null && 0 <= fractionConsumed && fractionConsumed <= 1) {
      double fractionRest = 1 - fractionConsumed;
      int splitAttempts = 8;
      for (int i = 0; i < 8 && residualSplit == null; ++i) {
        double fractionToSplit = fractionConsumed + fractionRest * i / splitAttempts;
        residualSplit = reader.splitAtFraction(fractionToSplit);
      }
    }
    List<TimestampedValue<T>> newResidualElements = Lists.newArrayList();
    try {
      while (advance()) {
        newResidualElements.add(
            TimestampedValue.of(reader.getCurrent(), reader.getCurrentTimestamp()));
      }
    } catch (IOException e) {
      throw new RuntimeException("Failed to read elements from the bounded reader.", e);
    }
    return new Checkpoint<>(newResidualElements, residualSplit);
  }
}
 
Example #24
Source File: UnboundedReadFromBoundedSource.java    From beam with Apache License 2.0 5 votes vote down vote up
public ResidualSource(BoundedSource<T> residualSource, PipelineOptions options) {
  this.residualSource = checkNotNull(residualSource, "residualSource");
  this.options = checkNotNull(options, "options");
  this.reader = null;
  this.closed = false;
  this.readerDone = false;
}
 
Example #25
Source File: ReadTranslation.java    From beam with Apache License 2.0 5 votes vote down vote up
public static BoundedSource<?> boundedSourceFromProto(ReadPayload payload)
    throws InvalidProtocolBufferException {
  checkArgument(payload.getIsBounded().equals(IsBounded.Enum.BOUNDED));
  return (BoundedSource<?>)
      SerializableUtils.deserializeFromByteArray(
          payload.getSource().getPayload().toByteArray(), "BoundedSource");
}
 
Example #26
Source File: XmlSourceTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Ignore(
    "Multi-byte characters in XML are not supported because the parser "
        + "currently does not correctly report byte offsets")
public void testReadXMLWithMultiByteElementName() throws IOException {
  File file = tempFolder.newFile("trainXMLTiny");
  Files.write(file.toPath(), xmlWithMultiByteElementName.getBytes(StandardCharsets.UTF_8));

  BoundedSource<Train> source =
      XmlIO.<Train>read()
          .from(file.toPath().toString())
          .withRootElement("දුම්රියන්")
          .withRecordElement("දුම්රිය")
          .withRecordClass(Train.class)
          .withMinBundleSize(1024)
          .createSource();

  List<Train> expectedResults =
      ImmutableList.of(
          new Train("Thomas", Train.TRAIN_NUMBER_UNDEFINED, null, null),
          new Train("Henry", Train.TRAIN_NUMBER_UNDEFINED, null, null),
          new Train("James", Train.TRAIN_NUMBER_UNDEFINED, null, null));

  assertThat(
      trainsToStrings(expectedResults),
      containsInAnyOrder(
          trainsToStrings(readEverythingFromReader(source.createReader(null))).toArray()));
}
 
Example #27
Source File: DirectRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public List<? extends BoundedSource<T>> split(
    long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
  // Must have more than
  checkState(
      desiredBundleSizeBytes < getEstimatedSizeBytes(options),
      "Must split into more than one source");
  return underlying.split(desiredBundleSizeBytes, options);
}
 
Example #28
Source File: XmlSourceTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadXmlWithAdditionalFieldsShouldNotThrowException() throws IOException {
  File file = tempFolder.newFile("trainXMLSmall");
  Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));

  BoundedSource<TinyTrain> source =
      XmlIO.<TinyTrain>read()
          .from(file.toPath().toString())
          .withRootElement("trains")
          .withRecordElement("train")
          .withRecordClass(TinyTrain.class)
          .createSource();

  List<TinyTrain> expectedResults =
      ImmutableList.of(
          new TinyTrain("Thomas"),
          new TinyTrain("Henry"),
          new TinyTrain("Toby"),
          new TinyTrain("Gordon"),
          new TinyTrain("Emily"),
          new TinyTrain("Percy"));

  assertThat(
      tinyTrainsToStrings(expectedResults),
      containsInAnyOrder(
          tinyTrainsToStrings(readEverythingFromReader(source.createReader(null))).toArray()));
}
 
Example #29
Source File: XmlSourceTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadXMLNoBundleSize() throws IOException {
  File file = tempFolder.newFile("trainXMLSmall");
  Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));

  BoundedSource<Train> source =
      XmlIO.<Train>read()
          .from(file.toPath().toString())
          .withRootElement("trains")
          .withRecordElement("train")
          .withRecordClass(Train.class)
          .createSource();

  List<Train> expectedResults =
      ImmutableList.of(
          new Train("Thomas", 1, "blue", null),
          new Train("Henry", 3, "green", null),
          new Train("Toby", 7, "brown", null),
          new Train("Gordon", 4, "blue", null),
          new Train("Emily", -1, "red", null),
          new Train("Percy", 6, "green", null));

  assertThat(
      trainsToStrings(expectedResults),
      containsInAnyOrder(
          trainsToStrings(readEverythingFromReader(source.createReader(null))).toArray()));
}
 
Example #30
Source File: UnboundedReadFromBoundedSourceTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCountingSourceToUnboundedCheckpoint() throws Exception {
  long numElements = 100;
  BoundedSource<Long> countingSource = CountingSource.upTo(numElements);
  List<Long> expected = Lists.newArrayList();
  for (long i = 0; i < numElements; ++i) {
    expected.add(i);
  }
  testBoundedToUnboundedSourceAdapterCheckpoint(countingSource, expected);
}