Java Code Examples for org.apache.beam.sdk.io.fs.ResourceId#toString()

The following examples show how to use org.apache.beam.sdk.io.fs.ResourceId#toString() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FileUtils.java    From deployment-examples with MIT License 6 votes vote down vote up
public static String copyFile(ResourceId sourceFile, ResourceId destinationFile)
    throws IOException {

  try (WritableByteChannel writeChannel = FileSystems.create(destinationFile, "text/plain")) {
    try (ReadableByteChannel readChannel = FileSystems.open(sourceFile)) {

      final ByteBuffer buffer = ByteBuffer.allocateDirect(16 * 1024);
      while (readChannel.read(buffer) != -1) {
        buffer.flip();
        writeChannel.write(buffer);
        buffer.compact();
      }
      buffer.flip();
      while (buffer.hasRemaining()) {
        writeChannel.write(buffer);
      }
    }
  }

  return destinationFile.toString();
}
 
Example 2
Source File: WindowedFilenamePolicy.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/**
 * Resolves any date variables which exist in the output directory path. This allows for the
 * dynamically changing of the output location based on the window end time.
 *
 * @return The new output directory with all variables resolved.
 */
private ResourceId resolveWithDateTemplates(
        ValueProvider<String> outputDirectoryStr, BoundedWindow window) {
    ResourceId outputDirectory = FileSystems.matchNewResource(outputDirectoryStr.get(), true);
    if (window instanceof IntervalWindow) {
        IntervalWindow intervalWindow = (IntervalWindow) window;
        DateTime time = intervalWindow.end().toDateTime();
        String outputPath = outputDirectory.toString();
        outputPath = outputPath.replace("YYYY", YEAR.print(time));
        outputPath = outputPath.replace("MM", MONTH.print(time));
        outputPath = outputPath.replace("DD", DAY.print(time));
        outputPath = outputPath.replace("HH", HOUR.print(time));
        outputPath = outputPath.replace("mm", MINUTE.print(time));
        outputDirectory = FileSystems.matchNewResource(outputPath, true);
    }
    return outputDirectory;
}
 
Example 3
Source File: WindowedFilenamePolicy.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/**
 * Resolves any date variables which exist in the output directory path. This allows for the
 * dynamically changing of the output location based on the window end time.
 *
 * @return The new output directory with all variables resolved.
 */
private ResourceId resolveWithDateTemplates(
    ValueProvider<String> outputDirectoryStr, BoundedWindow window) {
  ResourceId outputDirectory = FileSystems.matchNewResource(outputDirectoryStr.get(), true);

  if (window instanceof IntervalWindow) {
    IntervalWindow intervalWindow = (IntervalWindow) window;
    DateTime time = intervalWindow.end().toDateTime();
    String outputPath = outputDirectory.toString();
    outputPath = outputPath.replace("YYYY", YEAR.print(time));
    outputPath = outputPath.replace("MM", MONTH.print(time));
    outputPath = outputPath.replace("DD", DAY.print(time));
    outputPath = outputPath.replace("HH", HOUR.print(time));
    outputPath = outputPath.replace("mm", MINUTE.print(time));
    outputDirectory = FileSystems.matchNewResource(outputPath, true);
  }
  return outputDirectory;
}
 
Example 4
Source File: WindowedFilenamePolicyTest.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that windowedFilename() constructs the filename correctly according to the parameters
 * when using Strings.
 */
@Test
public void testWindowedFilenameFormatString() throws IOException {
  // Arrange
  //
  ResourceId outputDirectory = getBaseTempDirectory();
  WindowedContext context = mock(WindowedContext.class);
  BoundedWindow window = mock(BoundedWindow.class);
  PaneInfo paneInfo = PaneInfo.createPane(false, true, Timing.ON_TIME, 0, 0);
  WindowedFilenamePolicy policy =
      new WindowedFilenamePolicy(
          outputDirectory.toString(), "string-output", "-SSS-of-NNN", ".csv");
  // Act
  //
  ResourceId filename =
      policy.windowedFilename(1, 1, window, paneInfo, new TestOutputFileHints());

  // Assert
  //
  assertThat(filename, is(notNullValue()));
  assertThat(filename.getFilename(), is(equalTo("string-output-001-of-001.csv")));
}
 
Example 5
Source File: FileUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
public static String copyFile(ResourceId sourceFile, ResourceId destinationFile)
    throws IOException {

  try (WritableByteChannel writeChannel = FileSystems.create(destinationFile, "text/plain")) {
    try (ReadableByteChannel readChannel = FileSystems.open(sourceFile)) {

      final ByteBuffer buffer = ByteBuffer.allocateDirect(16 * 1024);
      while (readChannel.read(buffer) != -1) {
        buffer.flip();
        writeChannel.write(buffer);
        buffer.compact();
      }
      buffer.flip();
      while (buffer.hasRemaining()) {
        writeChannel.write(buffer);
      }
    }
  }

  return destinationFile.toString();
}
 
Example 6
Source File: DefaultFilenamePolicyTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static String constructName(
    String baseFilename,
    String shardTemplate,
    String suffix,
    int shardNum,
    int numShards,
    String paneStr,
    String windowStr) {
  ResourceId constructed =
      DefaultFilenamePolicy.constructName(
          FileSystems.matchNewResource(baseFilename, false),
          shardTemplate,
          suffix,
          shardNum,
          numShards,
          paneStr,
          windowStr);
  return constructed.toString();
}
 
Example 7
Source File: BoundedSideInputJoinTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Test {@code query} matches {@code model}. */
private <T extends KnownSize> void queryMatchesModel(
    String name,
    NexmarkConfiguration config,
    NexmarkQueryTransform<T> query,
    NexmarkQueryModel<T> model,
    boolean streamingMode)
    throws Exception {

  ResourceId sideInputResourceId =
      FileSystems.matchNewResource(
          String.format(
              "%s/BoundedSideInputJoin-%s",
              p.getOptions().getTempLocation(), new Random().nextInt()),
          false);
  config.sideInputUrl = sideInputResourceId.toString();

  try {
    PCollection<KV<Long, String>> sideInput = NexmarkUtils.prepareSideInput(p, config);
    query.setSideInput(sideInput);

    PCollection<Event> events =
        p.apply(
            name + ".Read",
            streamingMode
                ? NexmarkUtils.streamEventsSource(config)
                : NexmarkUtils.batchEventsSource(config));

    PCollection<TimestampedValue<T>> results =
        (PCollection<TimestampedValue<T>>) events.apply(new NexmarkQuery<>(config, query));
    PAssert.that(results).satisfies(model.assertionFor());
    PipelineResult result = p.run();
    result.waitUntilFinish();
  } finally {
    NexmarkUtils.cleanUpSideInput(config);
  }
}
 
Example 8
Source File: SessionSideInputJoinTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Test {@code query} matches {@code model}. */
private <T extends KnownSize> void queryMatchesModel(
    String name,
    NexmarkConfiguration config,
    NexmarkQueryTransform<T> query,
    NexmarkQueryModel<T> model,
    boolean streamingMode)
    throws Exception {

  ResourceId sideInputResourceId =
      FileSystems.matchNewResource(
          String.format(
              "%s/SessionSideInputJoin-%s",
              p.getOptions().getTempLocation(), new Random().nextInt()),
          false);
  config.sideInputUrl = sideInputResourceId.toString();

  try {
    PCollection<KV<Long, String>> sideInput = NexmarkUtils.prepareSideInput(p, config);
    query.setSideInput(sideInput);

    PCollection<Event> events =
        p.apply(
            name + ".Read",
            streamingMode
                ? NexmarkUtils.streamEventsSource(config)
                : NexmarkUtils.batchEventsSource(config));

    PCollection<TimestampedValue<T>> results =
        (PCollection<TimestampedValue<T>>) events.apply(new NexmarkQuery<>(config, query));
    PAssert.that(results).satisfies(model.assertionFor());
    PipelineResult result = p.run();
    result.waitUntilFinish();
  } finally {
    NexmarkUtils.cleanUpSideInput(config);
  }
}
 
Example 9
Source File: SqlBoundedSideInputJoinTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Test {@code query} matches {@code model}. */
private <T extends KnownSize> void queryMatchesModel(
    String name,
    NexmarkConfiguration config,
    NexmarkQueryTransform<T> query,
    NexmarkQueryModel<T> model,
    boolean streamingMode)
    throws Exception {

  ResourceId sideInputResourceId =
      FileSystems.matchNewResource(
          String.format(
              "%s/JoinToFiles-%s", p.getOptions().getTempLocation(), new Random().nextInt()),
          false);
  config.sideInputUrl = sideInputResourceId.toString();

  try {
    PCollection<KV<Long, String>> sideInput = NexmarkUtils.prepareSideInput(p, config);
    query.setSideInput(sideInput);

    PCollection<Event> events =
        p.apply(
            name + ".Read",
            streamingMode
                ? NexmarkUtils.streamEventsSource(config)
                : NexmarkUtils.batchEventsSource(config));

    PCollection<TimestampedValue<T>> results =
        (PCollection<TimestampedValue<T>>) events.apply(new NexmarkQuery<>(config, query));
    PAssert.that(results).satisfies(model.assertionFor());
    PipelineResult result = p.run();
    result.waitUntilFinish();
  } finally {
    NexmarkUtils.cleanUpSideInput(config);
  }
}
 
Example 10
Source File: NexmarkUtilsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testPrepareCsvSideInput() throws Exception {
  NexmarkConfiguration config = NexmarkConfiguration.DEFAULT.copy();
  config.sideInputType = NexmarkUtils.SideInputType.CSV;
  ResourceId sideInputResourceId =
      FileSystems.matchNewResource(
          String.format(
              "%s/JoinToFiles-%s",
              pipeline.getOptions().getTempLocation(), new Random().nextInt()),
          false);
  config.sideInputUrl = sideInputResourceId.toString();
  config.sideInputRowCount = 10000;
  config.sideInputNumShards = 15;

  PCollection<KV<Long, String>> sideInput = NexmarkUtils.prepareSideInput(pipeline, config);
  try {
    PAssert.that(sideInput)
        .containsInAnyOrder(
            LongStream.range(0, config.sideInputRowCount)
                .boxed()
                .map(l -> KV.of(l, l.toString()))
                .collect(Collectors.toList()));
    pipeline.run();
  } finally {
    NexmarkUtils.cleanUpSideInput(config);
  }
}
 
Example 11
Source File: FileBasedSinkTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Generate n temporary files using the temporary file pattern of Writer. */
private List<File> generateTemporaryFilesForFinalize(int numFiles) throws Exception {
  List<File> temporaryFiles = new ArrayList<>();
  for (int i = 0; i < numFiles; i++) {
    ResourceId temporaryFile =
        WriteOperation.buildTemporaryFilename(getBaseTempDirectory(), "" + i);
    File tmpFile = new File(tmpFolder.getRoot(), temporaryFile.toString());
    tmpFile.getParentFile().mkdirs();
    assertTrue(tmpFile.createNewFile());
    temporaryFiles.add(tmpFile);
  }

  return temporaryFiles;
}
 
Example 12
Source File: TextIOWriteTest.java    From beam with Apache License 2.0 4 votes vote down vote up
private static void assertOutputFiles(
    String[] elems,
    final String header,
    final String footer,
    int numShards,
    ResourceId outputPrefix,
    String shardNameTemplate)
    throws Exception {
  List<File> expectedFiles = new ArrayList<>();
  if (numShards == 0) {
    String pattern = outputPrefix.toString() + "*";
    List<MatchResult> matches = FileSystems.match(Collections.singletonList(pattern));
    for (Metadata expectedFile : Iterables.getOnlyElement(matches).metadata()) {
      expectedFiles.add(new File(expectedFile.resourceId().toString()));
    }
  } else {
    for (int i = 0; i < numShards; i++) {
      expectedFiles.add(
          new File(
              DefaultFilenamePolicy.constructName(
                      outputPrefix, shardNameTemplate, "", i, numShards, null, null)
                  .toString()));
    }
  }

  List<List<String>> actual = new ArrayList<>();

  for (File tmpFile : expectedFiles) {
    List<String> currentFile = readLinesFromFile(tmpFile);
    actual.add(currentFile);
  }

  List<String> expectedElements = new ArrayList<>(elems.length);
  for (String elem : elems) {
    byte[] encodedElem = CoderUtils.encodeToByteArray(StringUtf8Coder.of(), elem);
    String line = new String(encodedElem, Charsets.UTF_8);
    expectedElements.add(line);
  }

  List<String> actualElements =
      Lists.newArrayList(
          Iterables.concat(
              FluentIterable.from(actual)
                  .transform(removeHeaderAndFooter(header, footer))
                  .toList()));

  assertThat(actualElements, containsInAnyOrder(expectedElements.toArray()));
  assertTrue(actual.stream().allMatch(haveProperHeaderAndFooter(header, footer)::apply));
}