Java Code Examples for org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath#fromUri()

The following examples show how to use org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath#fromUri() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GcsUtil.java    From beam with Apache License 2.0 6 votes vote down vote up
LinkedList<RewriteOp> makeRewriteOps(
    Iterable<String> srcFilenames, Iterable<String> destFilenames) throws IOException {
  List<String> srcList = Lists.newArrayList(srcFilenames);
  List<String> destList = Lists.newArrayList(destFilenames);
  checkArgument(
      srcList.size() == destList.size(),
      "Number of source files %s must equal number of destination files %s",
      srcList.size(),
      destList.size());
  LinkedList<RewriteOp> rewrites = Lists.newLinkedList();
  for (int i = 0; i < srcList.size(); i++) {
    final GcsPath sourcePath = GcsPath.fromUri(srcList.get(i));
    final GcsPath destPath = GcsPath.fromUri(destList.get(i));
    rewrites.addLast(new RewriteOp(sourcePath, destPath));
  }
  return rewrites;
}
 
Example 2
Source File: GcsUtilTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testNonExistentObjectReturnsEmptyResult() throws IOException {
  GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
  GcsUtil gcsUtil = pipelineOptions.getGcsUtil();

  Storage mockStorage = Mockito.mock(Storage.class);
  gcsUtil.setStorageClient(mockStorage);

  Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
  Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);

  GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/nonexistentfile");
  GoogleJsonResponseException expectedException =
      googleJsonResponseException(
          HttpStatusCodes.STATUS_CODE_NOT_FOUND, "It don't exist", "Nothing here to see");

  when(mockStorage.objects()).thenReturn(mockStorageObjects);
  when(mockStorageObjects.get(pattern.getBucket(), pattern.getObject()))
      .thenReturn(mockStorageGet);
  when(mockStorageGet.execute()).thenThrow(expectedException);

  assertEquals(Collections.emptyList(), gcsUtil.expand(pattern));
}
 
Example 3
Source File: DLPTemplateHelper.java    From dlp-dataflow-deidentification with Apache License 2.0 5 votes vote down vote up
public static String getKekDetails(String gcsPath) {
  GcsPath path = GcsPath.fromUri(URI.create(gcsPath));
  Storage storage = StorageOptions.getDefaultInstance().getService();
  BlobId blobId = BlobId.of(path.getBucket(), path.getObject());
  byte[] content = storage.readAllBytes(blobId);
  String contentString = new String(content, UTF_8);
  return contentString;
}
 
Example 4
Source File: DLPTemplateHelper.java    From dlp-dataflow-deidentification with Apache License 2.0 5 votes vote down vote up
public static BlobId uploadConfig(String contents, String gcsPath, String fileName) {
  GcsPath path = GcsPath.fromUri(URI.create(gcsPath));
  Storage storage = StorageOptions.getDefaultInstance().getService();
  BlobId blobId = BlobId.of(path.getBucket(), fileName);
  BlobInfo blobInfo = BlobInfo.newBuilder(blobId).setContentType("application/json").build();
  Blob blob = storage.create(blobInfo, contents.getBytes(UTF_8));
  return blob.getBlobId();
}
 
Example 5
Source File: GcsFileSystem.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
protected GcsResourceId matchNewResource(String singleResourceSpec, boolean isDirectory) {
  if (isDirectory) {
    if (!singleResourceSpec.endsWith("/")) {
      singleResourceSpec += "/";
    }
  } else {
    checkArgument(
        !singleResourceSpec.endsWith("/"),
        "Expected a file path, but [%s], ends with '/'. This is unsupported in GcsFileSystem.",
        singleResourceSpec);
  }
  GcsPath path = GcsPath.fromUri(singleResourceSpec);
  return GcsResourceId.fromGcsPath(path);
}
 
Example 6
Source File: GcsPathValidator.java    From beam with Apache License 2.0 5 votes vote down vote up
private GcsPath getGcsPath(String path) {
  try {
    return GcsPath.fromUri(path);
  } catch (IllegalArgumentException e) {
    throw new IllegalArgumentException(
        String.format("Expected a valid 'gs://' path but was given '%s'", path), e);
  }
}
 
Example 7
Source File: GcsUtilTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testAccessDeniedObjectThrowsIOException() throws IOException {
  GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
  GcsUtil gcsUtil = pipelineOptions.getGcsUtil();

  Storage mockStorage = Mockito.mock(Storage.class);
  gcsUtil.setStorageClient(mockStorage);

  Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
  Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);

  GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/accessdeniedfile");
  GoogleJsonResponseException expectedException =
      googleJsonResponseException(
          HttpStatusCodes.STATUS_CODE_FORBIDDEN,
          "Waves hand mysteriously",
          "These aren't the buckets you're looking for");

  when(mockStorage.objects()).thenReturn(mockStorageObjects);
  when(mockStorageObjects.get(pattern.getBucket(), pattern.getObject()))
      .thenReturn(mockStorageGet);
  when(mockStorageGet.execute()).thenThrow(expectedException);

  thrown.expect(IOException.class);
  thrown.expectMessage("Unable to get the file object for path");
  gcsUtil.expand(pattern);
}
 
Example 8
Source File: GcsFileSystemTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private StorageObject createStorageObject(String gcsFilename, long fileSize) {
  GcsPath gcsPath = GcsPath.fromUri(gcsFilename);
  // Google APIs will use null for empty files.
  @Nullable BigInteger size = (fileSize == 0) ? null : BigInteger.valueOf(fileSize);
  return new StorageObject()
      .setBucket(gcsPath.getBucket())
      .setName(gcsPath.getObject())
      .setSize(size);
}
 
Example 9
Source File: BatchLoads.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void validate(PipelineOptions options) {
  // We will use a BigQuery load job -- validate the temp location.
  String tempLocation;
  if (customGcsTempLocation == null) {
    tempLocation = options.getTempLocation();
  } else {
    if (!customGcsTempLocation.isAccessible()) {
      // Can't perform verification in this case.
      return;
    }
    tempLocation = customGcsTempLocation.get();
  }
  checkArgument(
      !Strings.isNullOrEmpty(tempLocation),
      "BigQueryIO.Write needs a GCS temp location to store temp files."
          + "This can be set by withCustomGcsTempLocation() in the Builder"
          + "or through the fallback pipeline option --tempLocation.");
  if (bigQueryServices == null) {
    try {
      GcsPath.fromUri(tempLocation);
    } catch (IllegalArgumentException e) {
      throw new IllegalArgumentException(
          String.format(
              "BigQuery temp location expected a valid 'gs://' path, but was given '%s'",
              tempLocation),
          e);
    }
  }
}
 
Example 10
Source File: PackageUtilTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private StorageObject createStorageObject(String gcsFilename, long fileSize) {
  GcsPath gcsPath = GcsPath.fromUri(gcsFilename);
  return new StorageObject()
      .setBucket(gcsPath.getBucket())
      .setName(gcsPath.getObject())
      .setSize(BigInteger.valueOf(fileSize));
}
 
Example 11
Source File: GcsUtilTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testRecursiveGlobExpansion() throws IOException {
  GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
  GcsUtil gcsUtil = pipelineOptions.getGcsUtil();

  Storage mockStorage = Mockito.mock(Storage.class);
  gcsUtil.setStorageClient(mockStorage);

  Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
  Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);
  Storage.Objects.List mockStorageList = Mockito.mock(Storage.Objects.List.class);

  Objects modelObjects = new Objects();
  List<StorageObject> items = new ArrayList<>();
  // A directory
  items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/"));

  // Files within the directory
  items.add(new StorageObject().setBucket("testbucket").setName("test/directory/file1.txt"));
  items.add(new StorageObject().setBucket("testbucket").setName("test/directory/file2.txt"));
  items.add(new StorageObject().setBucket("testbucket").setName("test/directory/file3.txt"));
  items.add(new StorageObject().setBucket("testbucket").setName("test/directory/otherfile"));
  items.add(new StorageObject().setBucket("testbucket").setName("test/directory/anotherfile"));
  items.add(new StorageObject().setBucket("testbucket").setName("test/file4.txt"));

  modelObjects.setItems(items);

  when(mockStorage.objects()).thenReturn(mockStorageObjects);
  when(mockStorageObjects.get("testbucket", "test/directory/otherfile"))
      .thenReturn(mockStorageGet);
  when(mockStorageObjects.list("testbucket")).thenReturn(mockStorageList);
  when(mockStorageGet.execute())
      .thenReturn(
          new StorageObject().setBucket("testbucket").setName("test/directory/otherfile"));
  when(mockStorageList.execute()).thenReturn(modelObjects);

  {
    GcsPath pattern = GcsPath.fromUri("gs://testbucket/test/**/*.txt");
    List<GcsPath> expectedFiles =
        ImmutableList.of(
            GcsPath.fromUri("gs://testbucket/test/directory/file1.txt"),
            GcsPath.fromUri("gs://testbucket/test/directory/file2.txt"),
            GcsPath.fromUri("gs://testbucket/test/directory/file3.txt"),
            GcsPath.fromUri("gs://testbucket/test/file4.txt"));

    assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
  }
}