Java Code Examples for org.apache.beam.sdk.values.PCollection#setIsBoundedInternal()

The following examples show how to use org.apache.beam.sdk.values.PCollection#setIsBoundedInternal() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WriteFilesTest.java    From beam with Apache License 2.0 4 votes vote down vote up
private void testDynamicDestinationsHelper(boolean bounded, boolean emptyShards)
    throws IOException {
  TestDestinations dynamicDestinations = new TestDestinations(getBaseOutputDirectory());
  SimpleSink<Integer> sink =
      new SimpleSink<>(getBaseOutputDirectory(), dynamicDestinations, Compression.UNCOMPRESSED);

  // Flag to validate that the pipeline options are passed to the Sink.
  WriteOptions options = TestPipeline.testingPipelineOptions().as(WriteOptions.class);
  options.setTestFlag("test_value");
  Pipeline p = TestPipeline.create(options);

  final int numInputs = 100;
  List<String> inputs = Lists.newArrayList();
  for (int i = 0; i < numInputs; ++i) {
    inputs.add(Integer.toString(i));
  }
  // Prepare timestamps for the elements.
  List<Long> timestamps = new ArrayList<>();
  for (long i = 0; i < inputs.size(); i++) {
    timestamps.add(i + 1);
  }
  // If emptyShards==true make numShards larger than the number of elements per destination.
  // This will force every destination to generate some empty shards.
  int numShards = emptyShards ? 2 * numInputs / 5 : 2;
  WriteFiles<String, Integer, String> writeFiles = WriteFiles.to(sink).withNumShards(numShards);

  PCollection<String> input = p.apply(Create.timestamped(inputs, timestamps));
  WriteFilesResult<Integer> res;
  if (!bounded) {
    input.setIsBoundedInternal(IsBounded.UNBOUNDED);
    input = input.apply(Window.into(FixedWindows.of(Duration.standardDays(1))));
    res = input.apply(writeFiles.withWindowedWrites());
  } else {
    res = input.apply(writeFiles);
  }
  res.getPerDestinationOutputFilenames().apply(new VerifyFilesExist<>());
  p.run();

  for (int i = 0; i < 5; ++i) {
    ResourceId base =
        getBaseOutputDirectory().resolve("file_" + i, StandardResolveOptions.RESOLVE_FILE);
    List<String> expected = Lists.newArrayList();
    for (int j = i; j < numInputs; j += 5) {
      expected.add("record_" + j);
    }
    checkFileContents(
        base.toString(),
        expected,
        Optional.of(numShards),
        bounded /* expectRemovedTempDirectory */);
  }
}