Java Code Examples for org.apache.beam.sdk.io.FileBasedSink#convertToFileResourceIfPossible()

The following examples show how to use org.apache.beam.sdk.io.FileBasedSink#convertToFileResourceIfPossible() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DynamicOneFilePerWindow.java    From dlp-dataflow-deidentification with Apache License 2.0 5 votes vote down vote up
@Override
public PDone expand(PCollection<KV<String, String>> input) {

  PCollection<String> contents =
      input.apply(
          ParDo.of(
              new DoFn<KV<String, String>, String>() {
                @ProcessElement
                public void processElement(ProcessContext c) {
                  filenamePrefix = String.format("%s%s", filenamePrefix, c.element().getKey());
                  LOG.info("File Prefix {}", filenamePrefix);

                  c.output(c.element().getValue());
                }
              }));

  ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix);
  TextIO.Write write =
      TextIO.write()
          .to(new PerWindowFiles(resource))
          .withTempDirectory(resource.getCurrentDirectory())
          .withWindowedWrites();

  if (numShards != null) {
    write = write.withNumShards(numShards);
  }

  return contents.apply(write);
}
 
Example 2
Source File: WriteOneFilePerWindow.java    From dlp-dataflow-deidentification with Apache License 2.0 5 votes vote down vote up
@Override
public PDone expand(PCollection<String> input) {

  ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix);
  TextIO.Write write =
      TextIO.write()
          .to(new PerWindowFiles(resource))
          .withTempDirectory(resource.getCurrentDirectory())
          .withWindowedWrites();

  if (numShards != null) {
    write = write.withNumShards(numShards);
  }

  return input.apply(write);
}
 
Example 3
Source File: WriteOneFilePerWindow.java    From incubator-nemo with Apache License 2.0 5 votes vote down vote up
@Override
public PDone expand(final PCollection<String> input) {
  final ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix);
  TextIO.Write write =
    TextIO.write()
      .to(new PerWindowFiles(resource))
      .withTempDirectory(resource.getCurrentDirectory())
      .withWindowedWrites();
  if (numShards != null) {
    write = write.withNumShards(numShards);
  }
  return input.apply(write);
}
 
Example 4
Source File: WriteToText.java    From deployment-examples with MIT License 5 votes vote down vote up
@Override
public PDone expand(PCollection<String> input) {
  // Verify that the input has a compatible window type.
  checkArgument(
      input.getWindowingStrategy().getWindowFn().windowCoder() == IntervalWindow.getCoder());

  ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix);

  return input.apply(
      TextIO.write()
          .to(new PerWindowFiles(resource))
          .withTempDirectory(resource.getCurrentDirectory())
          .withWindowedWrites()
          .withNumShards(3));
}
 
Example 5
Source File: WriteOneFilePerWindow.java    From deployment-examples with MIT License 5 votes vote down vote up
@Override
public PDone expand(PCollection<String> input) {
  ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix);
  TextIO.Write write =
      TextIO.write()
          .to(new PerWindowFiles(resource))
          .withTempDirectory(resource.getCurrentDirectory())
          .withWindowedWrites();
  if (numShards != null) {
    write = write.withNumShards(numShards);
  }
  return input.apply(write);
}
 
Example 6
Source File: WriteToText.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PDone expand(PCollection<String> input) {
  // Verify that the input has a compatible window type.
  checkArgument(
      input.getWindowingStrategy().getWindowFn().windowCoder() == IntervalWindow.getCoder());

  ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix);

  return input.apply(
      TextIO.write()
          .to(new PerWindowFiles(resource))
          .withTempDirectory(resource.getCurrentDirectory())
          .withWindowedWrites()
          .withNumShards(3));
}
 
Example 7
Source File: WriteOneFilePerWindow.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PDone expand(PCollection<String> input) {
  ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix);
  TextIO.Write write =
      TextIO.write()
          .to(new PerWindowFiles(resource))
          .withTempDirectory(resource.getCurrentDirectory())
          .withWindowedWrites();
  if (numShards != null) {
    write = write.withNumShards(numShards);
  }
  return input.apply(write);
}
 
Example 8
Source File: WindowedWordCountIT.java    From beam with Apache License 2.0 4 votes vote down vote up
private void testWindowedWordCountPipeline(WindowedWordCountITOptions options) throws Exception {

    ResourceId output = FileBasedSink.convertToFileResourceIfPossible(options.getOutput());
    PerWindowFiles filenamePolicy = new PerWindowFiles(output);

    List<ShardedFile> expectedOutputFiles = Lists.newArrayListWithCapacity(6);

    for (int startMinute : ImmutableList.of(0, 10, 20, 30, 40, 50)) {
      final Instant windowStart =
          new Instant(options.getMinTimestampMillis()).plus(Duration.standardMinutes(startMinute));
      String filePrefix =
          filenamePolicy.filenamePrefixForWindow(
              new IntervalWindow(windowStart, windowStart.plus(Duration.standardMinutes(10))));
      expectedOutputFiles.add(
          new NumberedShardedFile(
              output
                      .getCurrentDirectory()
                      .resolve(filePrefix, StandardResolveOptions.RESOLVE_FILE)
                      .toString()
                  + "*"));
    }

    ShardedFile inputFile = new ExplicitShardedFile(Collections.singleton(options.getInputFile()));

    // For this integration test, input is tiny and we can build the expected counts
    SortedMap<String, Long> expectedWordCounts = new TreeMap<>();
    for (String line :
        inputFile.readFilesWithRetries(Sleeper.DEFAULT, BACK_OFF_FACTORY.backoff())) {
      String[] words = line.split(ExampleUtils.TOKENIZER_PATTERN, -1);

      for (String word : words) {
        if (!word.isEmpty()) {
          expectedWordCounts.put(
              word, MoreObjects.firstNonNull(expectedWordCounts.get(word), 0L) + 1L);
        }
      }
    }

    WindowedWordCount.runWindowedWordCount(options);

    assertThat(expectedOutputFiles, containsWordCounts(expectedWordCounts));
  }