Java Code Examples for org.apache.beam.sdk.io.fs.MatchResult#Metadata

The following examples show how to use org.apache.beam.sdk.io.fs.MatchResult#Metadata . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FileIO.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * @return True if metadata is a directory and directory Treatment is SKIP.
 * @throws java.lang.IllegalArgumentException if metadata is a directory and directoryTreatment
 *     is Prohibited.
 * @throws java.lang.UnsupportedOperationException if metadata is a directory and
 *     directoryTreatment is not SKIP or PROHIBIT.
 */
static boolean shouldSkipDirectory(
    MatchResult.Metadata metadata, DirectoryTreatment directoryTreatment) {
  if (metadata.resourceId().isDirectory()) {
    switch (directoryTreatment) {
      case SKIP:
        return true;
      case PROHIBIT:
        throw new IllegalArgumentException(
            "Trying to read " + metadata.resourceId() + " which is a directory");

      default:
        throw new UnsupportedOperationException(
            "Unknown DirectoryTreatment: " + directoryTreatment);
    }
  }

  return false;
}
 
Example 2
Source File: FileIO.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Converts metadata to readableFile. Make sure {@link
 * #shouldSkipDirectory(org.apache.beam.sdk.io.fs.MatchResult.Metadata,
 * org.apache.beam.sdk.io.FileIO.ReadMatches.DirectoryTreatment)} returns false before using.
 */
static ReadableFile matchToReadableFile(
    MatchResult.Metadata metadata, Compression compression) {

  compression =
      (compression == Compression.AUTO)
          ? Compression.detect(metadata.resourceId().getFilename())
          : compression;
  return new ReadableFile(
      MatchResult.Metadata.builder()
          .setResourceId(metadata.resourceId())
          .setSizeBytes(metadata.sizeBytes())
          .setLastModifiedMillis(metadata.lastModifiedMillis())
          .setIsReadSeekEfficient(
              metadata.isReadSeekEfficient() && compression == Compression.UNCOMPRESSED)
          .build(),
      compression);
}
 
Example 3
Source File: AvroTableFileAsMutationsTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Test
public void testFileSharding() throws Exception {
  Path path = tmpFolder.newFile("testfile").toPath();
  int splitSize = 10000;
  Files.write(path, new byte[splitSize * 2]);
  MatchResult.Metadata fileMetadata =
      MatchResult.Metadata.builder()
          .setResourceId(FileSystems.matchNewResource(path.toString(), false /* isDirectory */))
          .setIsReadSeekEfficient(true)
          .setSizeBytes(splitSize * 2)
          .build();

  PAssert.that(runFileShardingPipeline(fileMetadata, splitSize))
      .satisfies(
          input -> {
            LinkedList<FileShard> shards = Lists.newLinkedList(input);
            assertThat(shards, hasSize(2));
            shards.forEach(
                shard -> {
                  assertThat(
                      shard.getFile().getMetadata().resourceId().getFilename(),
                      equalTo("testfile"));
                  assertThat(shard.getTableName(), equalTo("testtable"));
                  assertThat(
                      shard.getRange().getTo() - shard.getRange().getFrom(),
                      equalTo(splitSize * 1L));
                });
            return null;
          });
  p.run();
}
 
Example 4
Source File: AvroTableFileAsMutationsTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Test
public void testFileShardingNotSeekable() throws Exception {
  Path path = tmpFolder.newFile("testfile").toPath();
  int splitSize = 10000;
  Files.write(path, new byte[splitSize * 2]);
  MatchResult.Metadata fileMetadata =
      MatchResult.Metadata.builder()
          .setResourceId(FileSystems.matchNewResource(path.toString(), false /* isDirectory */))
          .setIsReadSeekEfficient(false)
          .setSizeBytes(splitSize * 2)
          .build();

  PAssert.that(runFileShardingPipeline(fileMetadata, splitSize))
      .satisfies(
          input -> {
            LinkedList<FileShard> shards = Lists.newLinkedList(input);
            assertThat(shards, hasSize(1));
            FileShard shard = shards.getFirst();
            assertThat(
                shard.getFile().getMetadata().resourceId().getFilename(), equalTo("testfile"));
            assertThat(shard.getTableName(), equalTo("testtable"));
            assertThat(shard.getRange().getFrom(), equalTo(0L));
            assertThat(shard.getRange().getTo(), equalTo(splitSize * 2L));
            return null;
          });
  p.run();
}
 
Example 5
Source File: AvroTableFileAsMutationsTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Test
public void testFileShardingNoSharding() throws Exception {
  Path path = tmpFolder.newFile("testfile").toPath();
  int splitSize = 10000;
  Files.write(path, new byte[splitSize]);
  MatchResult.Metadata fileMetadata =
      MatchResult.Metadata.builder()
          .setResourceId(FileSystems.matchNewResource(path.toString(), false /* isDirectory */))
          .setIsReadSeekEfficient(true)
          .setSizeBytes(splitSize)
          .build();

  PAssert.that(runFileShardingPipeline(fileMetadata, splitSize))
      .satisfies(
          input -> {
            LinkedList<FileShard> shards = Lists.newLinkedList(input);
            assertThat(shards, hasSize(1));
            FileShard shard = shards.getFirst();
            assertThat(
                shard.getFile().getMetadata().resourceId().getFilename(), equalTo("testfile"));
            assertThat(shard.getTableName(), equalTo("testtable"));
            assertThat(shard.getRange().getFrom(), equalTo(0L));
            assertThat(shard.getRange().getTo(), equalTo(splitSize * 1L));
            return null;
          });
  p.run();
}
 
Example 6
Source File: RecordFileSource.java    From dataflow-opinion-analysis with Apache License 2.0 5 votes vote down vote up
@Override
protected FileBasedSource<T> createForSubrangeOfFile(
    MatchResult.Metadata metadata,
    long start,
    long end) {
  return new RecordFileSource<>(metadata, start, end, coder, separator);
}
 
Example 7
Source File: FileIOTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private static MatchResult.Metadata metadata(Path path, int size, long lastModifiedMillis) {
  return MatchResult.Metadata.builder()
      .setResourceId(FileSystems.matchNewResource(path.toString(), false /* isDirectory */))
      .setIsReadSeekEfficient(true)
      .setSizeBytes(size)
      .setLastModifiedMillis(lastModifiedMillis)
      .build();
}
 
Example 8
Source File: FileBasedIOITHelper.java    From beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) throws IOException {
  MatchResult match =
      Iterables.getOnlyElement(FileSystems.match(Collections.singletonList(c.element())));

  Set<ResourceId> resourceIds = new HashSet<>();
  for (MatchResult.Metadata metadataElem : match.metadata()) {
    resourceIds.add(metadataElem.resourceId());
  }

  FileSystems.delete(resourceIds);
}
 
Example 9
Source File: MatchResultMatcher.java    From beam with Apache License 2.0 5 votes vote down vote up
private MatchResultMatcher(
    MatchResult.Status expectedStatus,
    List<MatchResult.Metadata> expectedMetadata,
    IOException expectedException) {
  this.expectedStatus = checkNotNull(expectedStatus);
  checkArgument((expectedMetadata == null) ^ (expectedException == null));
  this.expectedMetadata = expectedMetadata;
  this.expectedException = expectedException;
}
 
Example 10
Source File: TextSource.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
protected FileBasedSource<String> createForSubrangeOfFile(
    MatchResult.Metadata metadata, long start, long end) {
  return new TextSource(metadata, start, end, delimiter);
}
 
Example 11
Source File: FileIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public PCollection<ReadableFile> expand(PCollection<MatchResult.Metadata> input) {
  return input.apply(ParDo.of(new ToReadableFileFn(this)));
}
 
Example 12
Source File: MatchResultMatcher.java    From beam with Apache License 2.0 4 votes vote down vote up
static MatchResultMatcher create(List<MatchResult.Metadata> expectedMetadata) {
  return new MatchResultMatcher(MatchResult.Status.OK, expectedMetadata, null);
}
 
Example 13
Source File: ReadableFileCoder.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public FileIO.ReadableFile decode(InputStream is) throws IOException {
  MatchResult.Metadata metadata = MetadataCoder.of().decode(is);
  Compression compression = Compression.values()[VarIntCoder.of().decode(is)];
  return new FileIO.ReadableFile(metadata, compression);
}
 
Example 14
Source File: MatchResultMatcher.java    From beam with Apache License 2.0 4 votes vote down vote up
private static MatchResultMatcher create(MatchResult.Metadata expectedMetadata) {
  return create(ImmutableList.of(expectedMetadata));
}
 
Example 15
Source File: FileIO.java    From beam with Apache License 2.0 4 votes vote down vote up
ReadableFile(MatchResult.Metadata metadata, Compression compression) {
  this.metadata = metadata;
  this.compression = compression;
}
 
Example 16
Source File: TextSource.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
TextSource(MatchResult.Metadata metadata, long start, long end, byte[] delimiter) {
  super(metadata, 1L, start, end);
  this.delimiter = delimiter;
}
 
Example 17
Source File: BeamHelper.java    From dbeam with Apache License 2.0 4 votes vote down vote up
public static String readFromFile(final String fileSpec) throws IOException {
  MatchResult.Metadata m = FileSystems.matchSingleFileSpec(fileSpec);
  InputStream inputStream = Channels.newInputStream(FileSystems.open(m.resourceId()));
  return CharStreams.toString(new InputStreamReader(inputStream, Charsets.UTF_8));
}
 
Example 18
Source File: BeamJdbcAvroSchema.java    From dbeam with Apache License 2.0 4 votes vote down vote up
public static Schema parseInputAvroSchemaFile(final String filename) throws IOException {
  MatchResult.Metadata m = FileSystems.matchSingleFileSpec(filename);
  InputStream inputStream = Channels.newInputStream(FileSystems.open(m.resourceId()));

  return new Schema.Parser().parse(inputStream);
}
 
Example 19
Source File: BigQuerySourceBase.java    From beam with Apache License 2.0 4 votes vote down vote up
public ExtractResult(
    TableSchema schema, List<ResourceId> extractedFiles, List<MatchResult.Metadata> metadata) {
  this.schema = schema;
  this.extractedFiles = extractedFiles;
  this.metadata = metadata;
}
 
Example 20
Source File: FileIOTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testMatchAndMatchAll() throws IOException {
  Path firstPath = tmpFolder.newFile("first").toPath();
  Path secondPath = tmpFolder.newFile("second").toPath();
  int firstSize = 37;
  int secondSize = 42;
  long firstModified = 1541097000L;
  long secondModified = 1541098000L;
  Files.write(firstPath, new byte[firstSize]);
  Files.write(secondPath, new byte[secondSize]);
  Files.setLastModifiedTime(firstPath, FileTime.fromMillis(firstModified));
  Files.setLastModifiedTime(secondPath, FileTime.fromMillis(secondModified));
  MatchResult.Metadata firstMetadata = metadata(firstPath, firstSize, firstModified);
  MatchResult.Metadata secondMetadata = metadata(secondPath, secondSize, secondModified);

  PAssert.that(
          p.apply(
              "Match existing",
              FileIO.match().filepattern(tmpFolder.getRoot().getAbsolutePath() + "/*")))
      .containsInAnyOrder(firstMetadata, secondMetadata);
  PAssert.that(
          p.apply(
              "Match existing with provider",
              FileIO.match()
                  .filepattern(p.newProvider(tmpFolder.getRoot().getAbsolutePath() + "/*"))))
      .containsInAnyOrder(firstMetadata, secondMetadata);
  PAssert.that(
          p.apply("Create existing", Create.of(tmpFolder.getRoot().getAbsolutePath() + "/*"))
              .apply("MatchAll existing", FileIO.matchAll()))
      .containsInAnyOrder(firstMetadata, secondMetadata);

  PAssert.that(
          p.apply(
              "Match non-existing ALLOW",
              FileIO.match()
                  .filepattern(tmpFolder.getRoot().getAbsolutePath() + "/blah")
                  .withEmptyMatchTreatment(EmptyMatchTreatment.ALLOW)))
      .containsInAnyOrder();
  PAssert.that(
          p.apply(
                  "Create non-existing",
                  Create.of(tmpFolder.getRoot().getAbsolutePath() + "/blah"))
              .apply(
                  "MatchAll non-existing ALLOW",
                  FileIO.matchAll().withEmptyMatchTreatment(EmptyMatchTreatment.ALLOW)))
      .containsInAnyOrder();

  PAssert.that(
          p.apply(
              "Match non-existing ALLOW_IF_WILDCARD",
              FileIO.match()
                  .filepattern(tmpFolder.getRoot().getAbsolutePath() + "/blah*")
                  .withEmptyMatchTreatment(EmptyMatchTreatment.ALLOW_IF_WILDCARD)))
      .containsInAnyOrder();
  PAssert.that(
          p.apply(
                  "Create non-existing wildcard + explicit",
                  Create.of(tmpFolder.getRoot().getAbsolutePath() + "/blah*"))
              .apply(
                  "MatchAll non-existing ALLOW_IF_WILDCARD",
                  FileIO.matchAll()
                      .withEmptyMatchTreatment(EmptyMatchTreatment.ALLOW_IF_WILDCARD)))
      .containsInAnyOrder();
  PAssert.that(
          p.apply(
                  "Create non-existing wildcard + default",
                  Create.of(tmpFolder.getRoot().getAbsolutePath() + "/blah*"))
              .apply("MatchAll non-existing default", FileIO.matchAll()))
      .containsInAnyOrder();

  p.run();
}