Java Code Examples for org.apache.beam.sdk.io.FileSystems#matchNewResource()

The following examples show how to use org.apache.beam.sdk.io.FileSystems#matchNewResource() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IsmSinkTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteKeyWhichIsProperPrefixOfPreviousSecondaryKeyIsError() throws Throwable {
  IsmSink<byte[]> sink =
      new IsmSink<>(
          FileSystems.matchNewResource(tmpFolder.newFile().getPath(), false),
          CODER,
          BLOOM_FILTER_SIZE_LIMIT);
  SinkWriter<WindowedValue<IsmRecord<byte[]>>> sinkWriter = sink.writer();
  sinkWriter.add(
      new ValueInEmptyWindows<>(
          IsmRecord.of(ImmutableList.of(EMPTY, new byte[] {0x00, 0x00}), EMPTY)));

  expectedException.expect(IllegalArgumentException.class);
  expectedException.expectMessage("expects keys to be written in strictly increasing order");
  sinkWriter.add(
      new ValueInEmptyWindows<>(IsmRecord.of(ImmutableList.of(EMPTY, new byte[] {0x00}), EMPTY)));
}
 
Example 2
Source File: ResourceIdTester.java    From beam with Apache License 2.0 6 votes vote down vote up
private static void validateResourceIds(List<ResourceId> resourceIds) {
  for (ResourceId resourceId : resourceIds) {
    // ResourceIds should equal themselves.
    assertThat("ResourceId equal to itself", resourceId, equalTo(resourceId));

    // ResourceIds should be clonable via FileSystems#matchNewResource.
    ResourceId cloned;
    if (resourceId.isDirectory()) {
      cloned = FileSystems.matchNewResource(resourceId.toString(), true /* isDirectory */);
    } else {
      cloned = FileSystems.matchNewResource(resourceId.toString(), false /* isDirectory */);
    }
    assertThat("ResourceId equals clone of itself", cloned, equalTo(resourceId));
    // .. and clones have consistent toString.
    assertThat(
        "ResourceId toString consistency", cloned.toString(), equalTo(resourceId.toString()));
    // .. and have consistent isDirectory.
    assertThat(
        "ResourceId isDirectory consistency",
        cloned.isDirectory(),
        equalTo(resourceId.isDirectory()));
  }
}
 
Example 3
Source File: WindowedFilenamePolicy.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/**
 * Resolves any date variables which exist in the output directory path. This allows for the
 * dynamically changing of the output location based on the window end time.
 *
 * @return The new output directory with all variables resolved.
 */
private ResourceId resolveWithDateTemplates(
        ValueProvider<String> outputDirectoryStr, BoundedWindow window) {
    ResourceId outputDirectory = FileSystems.matchNewResource(outputDirectoryStr.get(), true);
    if (window instanceof IntervalWindow) {
        IntervalWindow intervalWindow = (IntervalWindow) window;
        DateTime time = intervalWindow.end().toDateTime();
        String outputPath = outputDirectory.toString();
        outputPath = outputPath.replace("YYYY", YEAR.print(time));
        outputPath = outputPath.replace("MM", MONTH.print(time));
        outputPath = outputPath.replace("DD", DAY.print(time));
        outputPath = outputPath.replace("HH", HOUR.print(time));
        outputPath = outputPath.replace("mm", MINUTE.print(time));
        outputDirectory = FileSystems.matchNewResource(outputPath, true);
    }
    return outputDirectory;
}
 
Example 4
Source File: IsmSinkTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteNonContiguousShardsIsError() throws Throwable {
  IsmSink<byte[]> sink =
      new IsmSink<>(
          FileSystems.matchNewResource(tmpFolder.newFile().getPath(), false),
          CODER,
          BLOOM_FILTER_SIZE_LIMIT);
  SinkWriter<WindowedValue<IsmRecord<byte[]>>> sinkWriter = sink.writer();
  sinkWriter.add(
      new ValueInEmptyWindows<>(IsmRecord.of(ImmutableList.of(new byte[] {0x00}, EMPTY), EMPTY)));
  sinkWriter.add(
      new ValueInEmptyWindows<>(IsmRecord.of(ImmutableList.of(new byte[] {0x01}, EMPTY), EMPTY)));

  expectedException.expect(IllegalStateException.class);
  expectedException.expectMessage("for shard which already exists");
  sinkWriter.add(
      new ValueInEmptyWindows<>(IsmRecord.of(ImmutableList.of(new byte[] {0x00}, EMPTY), EMPTY)));
}
 
Example 5
Source File: XmlIO.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PDone expand(PCollection<T> input) {
  checkArgument(getRecordClass() != null, "withRecordClass() is required");
  checkArgument(getRootElement() != null, "withRootElement() is required");
  checkArgument(getFilenamePrefix() != null, "to() is required");
  checkArgument(getCharset() != null, "withCharset() is required");
  try {
    JAXBContext.newInstance(getRecordClass());
  } catch (JAXBException e) {
    throw new RuntimeException("Error binding classes to a JAXB Context.", e);
  }

  ResourceId prefix =
      FileSystems.matchNewResource(getFilenamePrefix(), false /* isDirectory */);
  input.apply(
      FileIO.<T>write()
          .via(
              sink(getRecordClass())
                  .withCharset(Charset.forName(getCharset()))
                  .withRootElement(getRootElement()))
          .to(prefix.getCurrentDirectory().toString())
          .withPrefix(prefix.getFilename())
          .withSuffix(".xml")
          .withIgnoreWindowing());
  return PDone.in(input.getPipeline());
}
 
Example 6
Source File: IsmSinkTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteEqualKeysIsError() throws Throwable {
  IsmSink<byte[]> sink =
      new IsmSink<>(
          FileSystems.matchNewResource(tmpFolder.newFile().getPath(), false),
          CODER,
          BLOOM_FILTER_SIZE_LIMIT);
  SinkWriter<WindowedValue<IsmRecord<byte[]>>> sinkWriter = sink.writer();
  sinkWriter.add(
      new ValueInEmptyWindows<>(IsmRecord.of(ImmutableList.of(EMPTY, new byte[] {0x01}), EMPTY)));

  expectedException.expect(IllegalArgumentException.class);
  expectedException.expectMessage("expects keys to be written in strictly increasing order");
  sinkWriter.add(
      new ValueInEmptyWindows<>(IsmRecord.of(ImmutableList.of(EMPTY, new byte[] {0x01}), EMPTY)));
}
 
Example 7
Source File: WindowedFilenamePolicy.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/**
 * Resolves any date variables which exist in the output directory path. This allows for the
 * dynamically changing of the output location based on the window end time.
 *
 * @return The new output directory with all variables resolved.
 */
private ResourceId resolveWithDateTemplates(
    ValueProvider<String> outputDirectoryStr, BoundedWindow window) {
  ResourceId outputDirectory = FileSystems.matchNewResource(outputDirectoryStr.get(), true);

  if (window instanceof IntervalWindow) {
    IntervalWindow intervalWindow = (IntervalWindow) window;
    DateTime time = intervalWindow.end().toDateTime();
    String outputPath = outputDirectory.toString();
    outputPath = outputPath.replace("YYYY", YEAR.print(time));
    outputPath = outputPath.replace("MM", MONTH.print(time));
    outputPath = outputPath.replace("DD", DAY.print(time));
    outputPath = outputPath.replace("HH", HOUR.print(time));
    outputPath = outputPath.replace("mm", MINUTE.print(time));
    outputDirectory = FileSystems.matchNewResource(outputPath, true);
  }
  return outputDirectory;
}
 
Example 8
Source File: IsmReaderTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Write input elements to the specified file. */
static void writeElementsToFile(Iterable<IsmRecord<byte[]>> elements, File tmpFile)
    throws Exception {
  IsmSink<byte[]> sink =
      new IsmSink<byte[]>(
          FileSystems.matchNewResource(tmpFile.getPath(), false),
          CODER,
          BLOOM_FILTER_SIZE_LIMIT) {
        @Override
        long getBlockSize() {
          return TEST_BLOCK_SIZE;
        }
      };

  try (SinkWriter<WindowedValue<IsmRecord<byte[]>>> writer = sink.writer()) {
    for (IsmRecord<byte[]> element : elements) {
      writer.add(new ValueInEmptyWindows<>(element));
    }
  }
}
 
Example 9
Source File: BeamHelper.java    From dbeam with Apache License 2.0 5 votes vote down vote up
public static void writeToFile(final String filename, final ByteBuffer contents)
    throws IOException {
  ResourceId resourceId = FileSystems.matchNewResource(filename, false);
  try (WritableByteChannel out = FileSystems.create(resourceId, MimeTypes.TEXT)) {
    out.write(contents);
  }
}
 
Example 10
Source File: SqlBoundedSideInputJoinTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Test {@code query} matches {@code model}. */
private <T extends KnownSize> void queryMatchesModel(
    String name,
    NexmarkConfiguration config,
    NexmarkQueryTransform<T> query,
    NexmarkQueryModel<T> model,
    boolean streamingMode)
    throws Exception {

  ResourceId sideInputResourceId =
      FileSystems.matchNewResource(
          String.format(
              "%s/JoinToFiles-%s", p.getOptions().getTempLocation(), new Random().nextInt()),
          false);
  config.sideInputUrl = sideInputResourceId.toString();

  try {
    PCollection<KV<Long, String>> sideInput = NexmarkUtils.prepareSideInput(p, config);
    query.setSideInput(sideInput);

    PCollection<Event> events =
        p.apply(
            name + ".Read",
            streamingMode
                ? NexmarkUtils.streamEventsSource(config)
                : NexmarkUtils.batchEventsSource(config));

    PCollection<TimestampedValue<T>> results =
        (PCollection<TimestampedValue<T>>) events.apply(new NexmarkQuery<>(config, query));
    PAssert.that(results).satisfies(model.assertionFor());
    PipelineResult result = p.run();
    result.waitUntilFinish();
  } finally {
    NexmarkUtils.cleanUpSideInput(config);
  }
}
 
Example 11
Source File: BigQueryIO.java    From beam with Apache License 2.0 5 votes vote down vote up
static List<ResourceId> getExtractFilePaths(String extractDestinationDir, Job extractJob)
    throws IOException {
  JobStatistics jobStats = extractJob.getStatistics();
  List<Long> counts = jobStats.getExtract().getDestinationUriFileCounts();
  if (counts.size() != 1) {
    String errorMessage =
        counts.isEmpty()
            ? "No destination uri file count received."
            : String.format(
                "More than one destination uri file count received. First two are %s, %s",
                counts.get(0), counts.get(1));
    throw new RuntimeException(errorMessage);
  }
  long filesCount = counts.get(0);

  ImmutableList.Builder<ResourceId> paths = ImmutableList.builder();
  ResourceId extractDestinationDirResourceId =
      FileSystems.matchNewResource(extractDestinationDir, true /* isDirectory */);
  for (long i = 0; i < filesCount; ++i) {
    ResourceId filePath =
        extractDestinationDirResourceId.resolve(
            String.format("%012d%s", i, ".avro"),
            ResolveOptions.StandardResolveOptions.RESOLVE_FILE);
    paths.add(filePath);
  }
  return paths.build();
}
 
Example 12
Source File: KeyStoreIntegrationTest.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
/**
* Write to cloud storage using the FileSystems API. See https://stackoverflow.com/a/50050583.
*/
private void writeToStorage(String path, byte[] data) throws Exception {
  ResourceId resourceId = FileSystems.matchNewResource(path, false);
  try (ByteArrayInputStream inputStream = new ByteArrayInputStream(data);
      ReadableByteChannel readerChannel = Channels.newChannel(inputStream);
      WritableByteChannel writerChannel = FileSystems.create(resourceId, MimeTypes.BINARY)) {
    ByteStreams.copy(readerChannel, writerChannel);
  }
}
 
Example 13
Source File: IsmSinkTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testUsingNonDeterministicShardKeyCoder() throws Exception {
  expectedException.expect(IllegalArgumentException.class);
  expectedException.expectMessage("is expected to be deterministic");
  new IsmSink<>(
      FileSystems.matchNewResource(tmpFolder.newFile().getPath(), false),
      IsmRecordCoder.of(
          1,
          0,
          ImmutableList.<Coder<?>>of(NON_DETERMINISTIC_CODER, ByteArrayCoder.of()),
          ByteArrayCoder.of()),
      BLOOM_FILTER_SIZE_LIMIT);
}
 
Example 14
Source File: HadoopResourceIdTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testResourceIdTester() {
  ResourceId baseDirectory =
      FileSystems.matchNewResource(
          "hdfs://" + hdfsClusterBaseUri.getPath(), true /* isDirectory */);
  ResourceIdTester.runResourceIdBattery(baseDirectory);
}
 
Example 15
Source File: BulkDecompressor.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/**
 * Decompresses the inputFile using the specified compression and outputs to the main output of
 * the {@link Decompress} doFn. Files output to the destination will be first written as temp
 * files with a "temp-" prefix within the output directory. If a file fails decompression, the
 * filename and the associated error will be output to the dead-letter.
 *
 * @param inputFile The inputFile to decompress.
 * @return A {@link ResourceId} which points to the resulting file from the decompression.
 */
private ResourceId decompress(ResourceId inputFile) throws IOException {
  // Remove the compressed extension from the file. Example: demo.txt.gz -> demo.txt
  String outputFilename = Files.getNameWithoutExtension(inputFile.toString());

  // Resolve the necessary resources to perform the transfer.
  ResourceId outputDir = FileSystems.matchNewResource(destinationLocation.get(), true);
  ResourceId outputFile =
      outputDir.resolve(outputFilename, StandardResolveOptions.RESOLVE_FILE);
  ResourceId tempFile =
      outputDir.resolve(Files.getFileExtension(inputFile.toString())
          + "-temp-" + outputFilename, StandardResolveOptions.RESOLVE_FILE);

  // Resolve the compression
  Compression compression = Compression.detect(inputFile.toString());

  // Perform the copy of the decompressed channel into the destination.
  try (ReadableByteChannel readerChannel =
      compression.readDecompressed(FileSystems.open(inputFile))) {
    try (WritableByteChannel writerChannel = FileSystems.create(tempFile, MimeTypes.TEXT)) {
      ByteStreams.copy(readerChannel, writerChannel);
    }

    // Rename the temp file to the output file.
    FileSystems.rename(
        ImmutableList.of(tempFile),
        ImmutableList.of(outputFile),
        MoveOptions.StandardMoveOptions.IGNORE_MISSING_FILES);
  } catch (IOException e) {
    String msg = e.getMessage();

    LOG.error("Error occurred during decompression of {}", inputFile.toString(), e);
    throw new IOException(sanitizeDecompressionErrorMsg(msg, inputFile, compression));
  }

  return outputFile;
}
 
Example 16
Source File: StreamingDataGeneratorTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/**
 * Helper to generate files for testing.
 *
 * @param filePath The path to the file to write.
 * @param fileContents The content to write.
 * @return The file written.
 * @throws IOException If an error occurs while creating or writing the file.
 */
private static ResourceId writeToFile(String filePath, String fileContents) throws IOException {

  ResourceId resourceId = FileSystems.matchNewResource(filePath, false);

  // Write the file contents to the channel and close.
  try (ReadableByteChannel readChannel =
      Channels.newChannel(new ByteArrayInputStream(fileContents.getBytes()))) {
    try (WritableByteChannel writeChannel = FileSystems.create(resourceId, MimeTypes.TEXT)) {
      ByteStreams.copy(readChannel, writeChannel);
    }
  }

  return resourceId;
}
 
Example 17
Source File: FhirIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Init batch.
 *
 * @throws IOException the io exception
 */
@StartBundle
public void initFile() throws IOException {
  // Write each bundle to newline delimited JSON file.
  String filename = String.format("fhirImportBatch-%s.ndjson", UUID.randomUUID().toString());
  ResourceId tempDir = FileSystems.matchNewResource(this.tempGcsPath.get(), true);
  this.resourceId = tempDir.resolve(filename, StandardResolveOptions.RESOLVE_FILE);
  this.ndJsonChannel = FileSystems.create(resourceId, "application/ld+json");
  if (mapper == null) {
    this.mapper = new ObjectMapper();
  }
}
 
Example 18
Source File: IsmReaderFactory.java    From beam with Apache License 2.0 4 votes vote down vote up
<V> NativeReader<?> createImpl(
    CloudObject spec,
    Coder<?> coder,
    PipelineOptions options,
    DataflowExecutionContext executionContext,
    DataflowOperationContext operationContext)
    throws Exception {
  final ResourceId resourceId =
      FileSystems.matchNewResource(
          getString(spec, WorkerPropertyNames.FILENAME), false /* isDirectory */);

  checkArgument(
      coder instanceof WindowedValueCoder,
      "%s only supports using %s but got %s.",
      IsmReader.class,
      WindowedValueCoder.class,
      coder);
  @SuppressWarnings("unchecked")
  WindowedValueCoder<IsmRecord<V>> windowedCoder = (WindowedValueCoder<IsmRecord<V>>) coder;

  checkArgument(
      windowedCoder.getValueCoder() instanceof IsmRecordCoder,
      "%s only supports using %s but got %s.",
      IsmReader.class,
      IsmRecordCoder.class,
      windowedCoder.getValueCoder());
  @SuppressWarnings("unchecked")
  final IsmRecordCoder<V> ismCoder = (IsmRecordCoder<V>) windowedCoder.getValueCoder();

  checkArgument(
      executionContext instanceof BatchModeExecutionContext,
      "%s only supports using %s but got %s.",
      IsmReader.class,
      BatchModeExecutionContext.class,
      executionContext);
  final BatchModeExecutionContext execContext = (BatchModeExecutionContext) executionContext;

  // We use a weak reference cache to always return the single IsmReader if there already
  // is one created within this JVM for this file instead of creating a new one each time.
  // This allows us to save on initialization costs across multiple work items that access
  // the same file.
  return execContext
      .<IsmReaderKey, NativeReader<?>>getLogicalReferenceCache()
      .get(
          new IsmReaderKey(resourceId.toString()),
          () ->
              new IsmReaderImpl<V>(
                  resourceId,
                  ismCoder,
                  execContext
                      .<IsmReaderImpl.IsmShardKey,
                          WeightedValue<
                              NavigableMap<RandomAccessData, WindowedValue<IsmRecord<V>>>>>
                          getDataCache()));
}
 
Example 19
Source File: ResourceIdCoder.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public ResourceId decode(InputStream is) throws IOException {
  String spec = STRING_CODER.decode(is);
  boolean isDirectory = BOOL_CODER.decode(is);
  return FileSystems.matchNewResource(spec, isDirectory);
}
 
Example 20
Source File: RequiresStableInputIT.java    From beam with Apache License 2.0 4 votes vote down vote up
public static void writeTextToFileSideEffect(String text, String filename) throws IOException {
  ResourceId rid = FileSystems.matchNewResource(filename, false);
  WritableByteChannel chan = FileSystems.create(rid, "text/plain");
  chan.write(ByteBuffer.wrap(text.getBytes(StandardCharsets.UTF_8)));
  chan.close();
}