org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath Java Examples

The following examples show how to use org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PackageUtilTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testPackageUploadEventuallySucceeds() throws Exception {
  Pipe pipe = Pipe.open();
  File tmpFile = makeFileWithContents("file.txt", "This is a test!");
  when(mockGcsUtil.getObjects(anyListOf(GcsPath.class)))
      .thenReturn(
          ImmutableList.of(
              StorageObjectOrIOException.create(new FileNotFoundException("some/path"))));
  when(mockGcsUtil.create(any(GcsPath.class), anyString()))
      .thenThrow(new IOException("Fake Exception: 410 Gone")) // First attempt fails
      .thenReturn(pipe.sink()); // second attempt succeeds

  try (PackageUtil directPackageUtil =
      PackageUtil.withExecutorService(MoreExecutors.newDirectExecutorService())) {
    directPackageUtil.stageClasspathElements(
        ImmutableList.of(makeStagedFile(tmpFile.getAbsolutePath())),
        STAGING_PATH,
        fastNanoClockAndSleeper,
        createOptions);
  } finally {
    verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class));
    verify(mockGcsUtil, times(2)).create(any(GcsPath.class), anyString());
    verifyNoMoreInteractions(mockGcsUtil);
  }
}
 
Example #2
Source File: GcsUtilTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testFileSizeWhenFileNotFoundNonBatch() throws Exception {
  MockLowLevelHttpResponse notFoundResponse = new MockLowLevelHttpResponse();
  notFoundResponse.setContent("");
  notFoundResponse.setStatusCode(HttpStatusCodes.STATUS_CODE_NOT_FOUND);

  MockHttpTransport mockTransport =
      new MockHttpTransport.Builder().setLowLevelHttpResponse(notFoundResponse).build();

  GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
  GcsUtil gcsUtil = pipelineOptions.getGcsUtil();

  gcsUtil.setStorageClient(new Storage(mockTransport, Transport.getJsonFactory(), null));

  thrown.expect(FileNotFoundException.class);
  gcsUtil.fileSize(GcsPath.fromComponents("testbucket", "testobject"));
}
 
Example #3
Source File: GcsUtilTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testFileSizeNonBatch() throws Exception {
  GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
  GcsUtil gcsUtil = pipelineOptions.getGcsUtil();

  Storage mockStorage = Mockito.mock(Storage.class);
  gcsUtil.setStorageClient(mockStorage);

  Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
  Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);

  when(mockStorage.objects()).thenReturn(mockStorageObjects);
  when(mockStorageObjects.get("testbucket", "testobject")).thenReturn(mockStorageGet);
  when(mockStorageGet.execute())
      .thenReturn(new StorageObject().setSize(BigInteger.valueOf(1000)));

  assertEquals(1000, gcsUtil.fileSize(GcsPath.fromComponents("testbucket", "testobject")));
}
 
Example #4
Source File: PackageUtilTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testPackageUploadIsNotSkippedWhenSizesAreDifferent() throws Exception {
  Pipe pipe = Pipe.open();
  File tmpDirectory = tmpFolder.newFolder("folder");
  tmpFolder.newFolder("folder", "empty_directory");
  tmpFolder.newFolder("folder", "directory");
  makeFileWithContents("folder/file.txt", "This is a test!");
  makeFileWithContents("folder/directory/file.txt", "This is also a test!");
  when(mockGcsUtil.getObjects(anyListOf(GcsPath.class)))
      .thenReturn(
          ImmutableList.of(
              StorageObjectOrIOException.create(
                  createStorageObject(STAGING_PATH, Long.MAX_VALUE))));
  when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());

  defaultPackageUtil.stageClasspathElements(
      ImmutableList.of(makeStagedFile(tmpDirectory.getAbsolutePath())),
      STAGING_PATH,
      createOptions);

  verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class));
  verify(mockGcsUtil).create(any(GcsPath.class), anyString());
  verifyNoMoreInteractions(mockGcsUtil);
}
 
Example #5
Source File: GcsUtilTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetBucket() throws IOException {
  GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
  GcsUtil gcsUtil = pipelineOptions.getGcsUtil();

  Storage mockStorage = Mockito.mock(Storage.class);
  gcsUtil.setStorageClient(mockStorage);

  Storage.Buckets mockStorageObjects = Mockito.mock(Storage.Buckets.class);
  Storage.Buckets.Get mockStorageGet = Mockito.mock(Storage.Buckets.Get.class);

  BackOff mockBackOff = BackOffAdapter.toGcpBackOff(FluentBackoff.DEFAULT.backoff());

  when(mockStorage.buckets()).thenReturn(mockStorageObjects);
  when(mockStorageObjects.get("testbucket")).thenReturn(mockStorageGet);
  when(mockStorageGet.execute())
      .thenThrow(new SocketTimeoutException("SocketException"))
      .thenReturn(new Bucket());

  assertNotNull(
      gcsUtil.getBucket(
          GcsPath.fromComponents("testbucket", "testobject"),
          mockBackOff,
          new FastNanoClockAndSleeper()));
}
 
Example #6
Source File: GcsUtilTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testNonExistentObjectReturnsEmptyResult() throws IOException {
  GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
  GcsUtil gcsUtil = pipelineOptions.getGcsUtil();

  Storage mockStorage = Mockito.mock(Storage.class);
  gcsUtil.setStorageClient(mockStorage);

  Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
  Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);

  GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/nonexistentfile");
  GoogleJsonResponseException expectedException =
      googleJsonResponseException(
          HttpStatusCodes.STATUS_CODE_NOT_FOUND, "It don't exist", "Nothing here to see");

  when(mockStorage.objects()).thenReturn(mockStorageObjects);
  when(mockStorageObjects.get(pattern.getBucket(), pattern.getObject()))
      .thenReturn(mockStorageGet);
  when(mockStorageGet.execute()).thenThrow(expectedException);

  assertEquals(Collections.emptyList(), gcsUtil.expand(pattern));
}
 
Example #7
Source File: GcsUtil.java    From beam with Apache License 2.0 6 votes vote down vote up
LinkedList<RewriteOp> makeRewriteOps(
    Iterable<String> srcFilenames, Iterable<String> destFilenames) throws IOException {
  List<String> srcList = Lists.newArrayList(srcFilenames);
  List<String> destList = Lists.newArrayList(destFilenames);
  checkArgument(
      srcList.size() == destList.size(),
      "Number of source files %s must equal number of destination files %s",
      srcList.size(),
      destList.size());
  LinkedList<RewriteOp> rewrites = Lists.newLinkedList();
  for (int i = 0; i < srcList.size(); i++) {
    final GcsPath sourcePath = GcsPath.fromUri(srcList.get(i));
    final GcsPath destPath = GcsPath.fromUri(destList.get(i));
    rewrites.addLast(new RewriteOp(sourcePath, destPath));
  }
  return rewrites;
}
 
Example #8
Source File: GcsUtilTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testBucketDoesNotExist() throws IOException {
  GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
  GcsUtil gcsUtil = pipelineOptions.getGcsUtil();

  Storage mockStorage = Mockito.mock(Storage.class);
  gcsUtil.setStorageClient(mockStorage);

  Storage.Buckets mockStorageObjects = Mockito.mock(Storage.Buckets.class);
  Storage.Buckets.Get mockStorageGet = Mockito.mock(Storage.Buckets.Get.class);

  BackOff mockBackOff = BackOffAdapter.toGcpBackOff(FluentBackoff.DEFAULT.backoff());

  when(mockStorage.buckets()).thenReturn(mockStorageObjects);
  when(mockStorageObjects.get("testbucket")).thenReturn(mockStorageGet);
  when(mockStorageGet.execute())
      .thenThrow(
          googleJsonResponseException(
              HttpStatusCodes.STATUS_CODE_NOT_FOUND, "It don't exist", "Nothing here to see"));

  assertFalse(
      gcsUtil.bucketAccessible(
          GcsPath.fromComponents("testbucket", "testobject"),
          mockBackOff,
          new FastNanoClockAndSleeper()));
}
 
Example #9
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static DataflowPipelineOptions buildPipelineOptions() throws IOException {
  GcsUtil mockGcsUtil = mock(GcsUtil.class);
  when(mockGcsUtil.expand(any(GcsPath.class)))
      .then(invocation -> ImmutableList.of((GcsPath) invocation.getArguments()[0]));
  when(mockGcsUtil.bucketAccessible(any(GcsPath.class))).thenReturn(true);

  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setGcpCredential(new TestCredential());
  options.setJobName("some-job-name");
  options.setProject("some-project");
  options.setRegion("some-region");
  options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
  options.setFilesToStage(new ArrayList<>());
  options.setDataflowClient(buildMockDataflow(new IsValidCreateRequest()));
  options.setGcsUtil(mockGcsUtil);

  // Enable the FileSystems API to know about gs:// URIs in this test.
  FileSystems.setDefaultPipelineOptions(options);

  return options;
}
 
Example #10
Source File: BatchStatefulParDoOverridesTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static DataflowPipelineOptions buildPipelineOptions(String... args) throws IOException {
  GcsUtil mockGcsUtil = mock(GcsUtil.class);
  when(mockGcsUtil.expand(any(GcsPath.class)))
      .then(invocation -> ImmutableList.of((GcsPath) invocation.getArguments()[0]));
  when(mockGcsUtil.bucketAccessible(any(GcsPath.class))).thenReturn(true);

  DataflowPipelineOptions options =
      PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setGcpCredential(new TestCredential());
  options.setJobName("some-job-name");
  options.setProject("some-project");
  options.setRegion("some-region");
  options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
  options.setFilesToStage(new ArrayList<>());
  options.setGcsUtil(mockGcsUtil);

  // Enable the FileSystems API to know about gs:// URIs in this test.
  FileSystems.setDefaultPipelineOptions(options);

  return options;
}
 
Example #11
Source File: PackageUtilTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testPackageUploadWithLargeClasspathLogsWarning() throws Exception {
  File tmpFile = makeFileWithContents("file.txt", "This is a test!");
  when(mockGcsUtil.getObjects(anyListOf(GcsPath.class)))
      .thenReturn(
          ImmutableList.of(
              StorageObjectOrIOException.create(
                  createStorageObject(STAGING_PATH, tmpFile.length()))));

  List<StagedFile> classpathElements = Lists.newLinkedList();
  for (int i = 0; i < 1005; ++i) {
    String eltName = "element" + i;
    classpathElements.add(makeStagedFile(tmpFile.getAbsolutePath(), eltName));
  }

  defaultPackageUtil.stageClasspathElements(classpathElements, STAGING_PATH, createOptions);
  logged.verifyWarn("Your classpath contains 1005 elements, which Google Cloud Dataflow");
}
 
Example #12
Source File: GcsUtilTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testBucketAccessible() throws IOException {
  GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
  GcsUtil gcsUtil = pipelineOptions.getGcsUtil();

  Storage mockStorage = Mockito.mock(Storage.class);
  gcsUtil.setStorageClient(mockStorage);

  Storage.Buckets mockStorageObjects = Mockito.mock(Storage.Buckets.class);
  Storage.Buckets.Get mockStorageGet = Mockito.mock(Storage.Buckets.Get.class);

  BackOff mockBackOff = BackOffAdapter.toGcpBackOff(FluentBackoff.DEFAULT.backoff());

  when(mockStorage.buckets()).thenReturn(mockStorageObjects);
  when(mockStorageObjects.get("testbucket")).thenReturn(mockStorageGet);
  when(mockStorageGet.execute())
      .thenThrow(new SocketTimeoutException("SocketException"))
      .thenReturn(new Bucket());

  assertTrue(
      gcsUtil.bucketAccessible(
          GcsPath.fromComponents("testbucket", "testobject"),
          mockBackOff,
          new FastNanoClockAndSleeper()));
}
 
Example #13
Source File: MinimalWordCountTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private GcsUtil buildMockGcsUtil() throws IOException {
  GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);

  // Any request to open gets a new bogus channel
  Mockito.when(mockGcsUtil.open(Mockito.any(GcsPath.class)))
      .then(
          invocation ->
              FileChannel.open(
                  Files.createTempFile("channel-", ".tmp"),
                  StandardOpenOption.CREATE,
                  StandardOpenOption.DELETE_ON_CLOSE));

  // Any request for expansion returns a list containing the original GcsPath
  // This is required to pass validation that occurs in TextIO during apply()
  Mockito.when(mockGcsUtil.expand(Mockito.any(GcsPath.class)))
      .then(invocation -> ImmutableList.of((GcsPath) invocation.getArguments()[0]));

  return mockGcsUtil;
}
 
Example #14
Source File: PackageUtilTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testStagingPreservesClasspath() throws Exception {
  File smallFile = makeFileWithContents("small.txt", "small");
  File largeFile = makeFileWithContents("large.log", "large contents");
  when(mockGcsUtil.getObjects(anyListOf(GcsPath.class)))
      .thenReturn(
          ImmutableList.of(
              StorageObjectOrIOException.create(new FileNotFoundException("some/path"))));

  when(mockGcsUtil.create(any(GcsPath.class), anyString()))
      .thenAnswer(invocation -> Pipe.open().sink());

  List<DataflowPackage> targets =
      defaultPackageUtil.stageClasspathElements(
          ImmutableList.of(
              makeStagedFile(smallFile.getAbsolutePath()),
              makeStagedFile(largeFile.getAbsolutePath())),
          STAGING_PATH,
          createOptions);
  // Verify that the packages are returned small, then large, matching input order even though
  // the large file would be uploaded first.
  assertThat(targets.get(0).getName(), endsWith(".txt"));
  assertThat(targets.get(1).getName(), endsWith(".log"));
}
 
Example #15
Source File: FileChecksum.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/**
 * Calculate the checksums of a set of files on GCS.
 * @param gcsUtil - Used to retrieve the files.
 * @param gcsPaths - List of paths of the files.
 * @return A List of String representing the MD5 hashes of the files.
 */
public static List<String> getGcsFileChecksums(GcsUtil gcsUtil, List<GcsPath> gcsPaths) {
  List<String> checksums = new ArrayList<>();
  try {
    for (StorageObjectOrIOException objectOrIOException : gcsUtil.getObjects(gcsPaths)) {
      IOException ex = objectOrIOException.ioException();
      if (ex != null) {
        throw ex;
      }
      checksums.add(objectOrIOException.storageObject().getMd5Hash());
    }
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  return checksums;
}
 
Example #16
Source File: ExportTransform.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
private TableManifest buildGcsManifest(ProcessContext c, Iterable<GcsPath> files) {
  org.apache.beam.sdk.extensions.gcp.util.GcsUtil gcsUtil =
      c.getPipelineOptions().as(GcsOptions.class).getGcsUtil();
  TableManifest.Builder result = TableManifest.newBuilder();

  List<GcsPath> gcsPaths = new ArrayList<>();
  files.forEach(gcsPaths::add);

  // Fetch object metadata from GCS
  List<String> checksums = FileChecksum.getGcsFileChecksums(gcsUtil, gcsPaths);
  for (int i = 0; i < gcsPaths.size(); i++) {
    String fileName = gcsPaths.get(i).getFileName().getObject();
    String hash = checksums.get(i);
    result.addFilesBuilder().setName(fileName).setMd5(hash);
  }
  return result.build();
}
 
Example #17
Source File: ExportTransform.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {
  if (Objects.equals(c.element().getKey(), EMPTY_EXPORT_FILE)) {
    return;
  }
  Iterable<String> files = c.element().getValue();
  Iterator<String> it = files.iterator();
  boolean gcs = it.hasNext() && GcsPath.GCS_URI.matcher(it.next()).matches();
  TableManifest proto;
  if (gcs) {
    Iterable<GcsPath> gcsPaths = Iterables.transform(files, s -> GcsPath.fromUri(s));
    proto = buildGcsManifest(c, gcsPaths);
  } else {
    Iterable<Path> paths = Iterables.transform(files, s -> Paths.get(s));
    proto = buildLocalManifest(paths);
  }
  try {
    c.output(KV.of(c.element().getKey(), JsonFormat.printer().print(proto)));
  } catch (InvalidProtocolBufferException e) {
    throw new RuntimeException(e);
  }
}
 
Example #18
Source File: PackageUtilTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test(expected = RuntimeException.class)
public void testPackageUploadFailsWhenIOExceptionThrown() throws Exception {
  File tmpFile = makeFileWithContents("file.txt", "This is a test!");
  when(mockGcsUtil.getObjects(anyListOf(GcsPath.class)))
      .thenReturn(
          ImmutableList.of(
              StorageObjectOrIOException.create(new FileNotFoundException("some/path"))));
  when(mockGcsUtil.create(any(GcsPath.class), anyString()))
      .thenThrow(new IOException("Fake Exception: Upload error"));

  try (PackageUtil directPackageUtil =
      PackageUtil.withExecutorService(MoreExecutors.newDirectExecutorService())) {
    directPackageUtil.stageClasspathElements(
        ImmutableList.of(makeStagedFile(tmpFile.getAbsolutePath())),
        STAGING_PATH,
        fastNanoClockAndSleeper,
        createOptions);
  } finally {
    verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class));
    verify(mockGcsUtil, times(5)).create(any(GcsPath.class), anyString());
    verifyNoMoreInteractions(mockGcsUtil);
  }
}
 
Example #19
Source File: MinimalWordCountTest.java    From deployment-examples with MIT License 6 votes vote down vote up
private GcsUtil buildMockGcsUtil() throws IOException {
  GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);

  // Any request to open gets a new bogus channel
  Mockito.when(mockGcsUtil.open(Mockito.any(GcsPath.class)))
      .then(
          invocation ->
              FileChannel.open(
                  Files.createTempFile("channel-", ".tmp"),
                  StandardOpenOption.CREATE,
                  StandardOpenOption.DELETE_ON_CLOSE));

  // Any request for expansion returns a list containing the original GcsPath
  // This is required to pass validation that occurs in TextIO during apply()
  Mockito.when(mockGcsUtil.expand(Mockito.any(GcsPath.class)))
      .then(invocation -> ImmutableList.of((GcsPath) invocation.getArguments()[0]));

  return mockGcsUtil;
}
 
Example #20
Source File: PackageUtilTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testPackageUploadWithFileSucceeds() throws Exception {
  Pipe pipe = Pipe.open();
  String contents = "This is a test!";
  File tmpFile = makeFileWithContents("file.txt", contents);
  when(mockGcsUtil.getObjects(anyListOf(GcsPath.class)))
      .thenReturn(
          ImmutableList.of(
              StorageObjectOrIOException.create(new FileNotFoundException("some/path"))));

  when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink());

  List<DataflowPackage> targets =
      defaultPackageUtil.stageClasspathElements(
          ImmutableList.of(makeStagedFile(tmpFile.getAbsolutePath())),
          STAGING_PATH,
          createOptions);
  DataflowPackage target = Iterables.getOnlyElement(targets);

  verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class));
  verify(mockGcsUtil).create(any(GcsPath.class), anyString());
  verifyNoMoreInteractions(mockGcsUtil);

  assertThat(target.getName(), endsWith(".txt"));
  assertThat(target.getLocation(), equalTo(STAGING_PATH + target.getName()));
  assertThat(
      new LineReader(Channels.newReader(pipe.source(), StandardCharsets.UTF_8.name())).readLine(),
      equalTo(contents));
}
 
Example #21
Source File: GcpOptionsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testDefaultGcpTempLocationDoesNotExist() throws IOException {
  String tempLocation = "gs://does/not/exist";
  options.setTempLocation(tempLocation);
  when(mockGcsUtil.bucketAccessible(any(GcsPath.class))).thenReturn(false);
  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage(
      "Error constructing default value for gcpTempLocation: tempLocation is not"
          + " a valid GCS path");
  thrown.expectCause(
      hasMessage(containsString("Output path does not exist or is not writeable")));

  options.as(GcpOptions.class).getGcpTempLocation();
}
 
Example #22
Source File: BatchLoads.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void validate(PipelineOptions options) {
  // We will use a BigQuery load job -- validate the temp location.
  String tempLocation;
  if (customGcsTempLocation == null) {
    tempLocation = options.getTempLocation();
  } else {
    if (!customGcsTempLocation.isAccessible()) {
      // Can't perform verification in this case.
      return;
    }
    tempLocation = customGcsTempLocation.get();
  }
  checkArgument(
      !Strings.isNullOrEmpty(tempLocation),
      "BigQueryIO.Write needs a GCS temp location to store temp files."
          + "This can be set by withCustomGcsTempLocation() in the Builder"
          + "or through the fallback pipeline option --tempLocation.");
  if (bigQueryServices == null) {
    try {
      GcsPath.fromUri(tempLocation);
    } catch (IllegalArgumentException e) {
      throw new IllegalArgumentException(
          String.format(
              "BigQuery temp location expected a valid 'gs://' path, but was given '%s'",
              tempLocation),
          e);
    }
  }
}
 
Example #23
Source File: GcsUtilTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetSizeBytesWhenFileNotFoundBatch() throws Exception {
  JsonFactory jsonFactory = new JacksonFactory();

  String contentBoundary = "batch_foobarbaz";
  String contentBoundaryLine = "--" + contentBoundary;
  String endOfContentBoundaryLine = "--" + contentBoundary + "--";

  GenericJson error = new GenericJson().set("error", new GenericJson().set("code", 404));
  error.setFactory(jsonFactory);

  String content =
      contentBoundaryLine
          + "\n"
          + "Content-Type: application/http\n"
          + "\n"
          + "HTTP/1.1 404 Not Found\n"
          + "Content-Length: -1\n"
          + "\n"
          + error.toString()
          + "\n"
          + "\n"
          + endOfContentBoundaryLine
          + "\n";
  thrown.expect(FileNotFoundException.class);
  MockLowLevelHttpResponse notFoundResponse =
      new MockLowLevelHttpResponse()
          .setContentType("multipart/mixed; boundary=" + contentBoundary)
          .setContent(content)
          .setStatusCode(HttpStatusCodes.STATUS_CODE_OK);

  MockHttpTransport mockTransport =
      new MockHttpTransport.Builder().setLowLevelHttpResponse(notFoundResponse).build();

  GcsUtil gcsUtil = gcsOptionsWithTestCredential().getGcsUtil();

  gcsUtil.setStorageClient(new Storage(mockTransport, Transport.getJsonFactory(), null));
  gcsUtil.fileSizes(ImmutableList.of(GcsPath.fromComponents("testbucket", "testobject")));
}
 
Example #24
Source File: GcsUtilTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testRetryFileSizeNonBatch() throws IOException {
  GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
  GcsUtil gcsUtil = pipelineOptions.getGcsUtil();

  Storage mockStorage = Mockito.mock(Storage.class);
  gcsUtil.setStorageClient(mockStorage);

  Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
  Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);

  BackOff mockBackOff =
      BackOffAdapter.toGcpBackOff(FluentBackoff.DEFAULT.withMaxRetries(2).backoff());

  when(mockStorage.objects()).thenReturn(mockStorageObjects);
  when(mockStorageObjects.get("testbucket", "testobject")).thenReturn(mockStorageGet);
  when(mockStorageGet.execute())
      .thenThrow(new SocketTimeoutException("SocketException"))
      .thenThrow(new SocketTimeoutException("SocketException"))
      .thenReturn(new StorageObject().setSize(BigInteger.valueOf(1000)));

  assertEquals(
      1000,
      gcsUtil
          .getObject(
              GcsPath.fromComponents("testbucket", "testobject"),
              mockBackOff,
              new FastNanoClockAndSleeper())
          .getSize()
          .longValue());
  assertEquals(BackOff.STOP, mockBackOff.nextBackOffMillis());
}
 
Example #25
Source File: GcsUtilTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testAccessDeniedObjectThrowsIOException() throws IOException {
  GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
  GcsUtil gcsUtil = pipelineOptions.getGcsUtil();

  Storage mockStorage = Mockito.mock(Storage.class);
  gcsUtil.setStorageClient(mockStorage);

  Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
  Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);

  GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/accessdeniedfile");
  GoogleJsonResponseException expectedException =
      googleJsonResponseException(
          HttpStatusCodes.STATUS_CODE_FORBIDDEN,
          "Waves hand mysteriously",
          "These aren't the buckets you're looking for");

  when(mockStorage.objects()).thenReturn(mockStorageObjects);
  when(mockStorageObjects.get(pattern.getBucket(), pattern.getObject()))
      .thenReturn(mockStorageGet);
  when(mockStorageGet.execute()).thenThrow(expectedException);

  thrown.expect(IOException.class);
  thrown.expectMessage("Unable to get the file object for path");
  gcsUtil.expand(pattern);
}
 
Example #26
Source File: GcsUtilIT.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Tests a rewrite operation that requires multiple API calls (using a continuation token). */
@Test
public void testRewriteMultiPart() throws IOException {
  TestPipelineOptions options =
      TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class);
  // Using a KMS key is necessary to trigger multi-part rewrites (bucket is created
  // with a bucket default key).
  assertNotNull(options.getTempRoot());
  options.setTempLocation(options.getTempRoot() + "/testRewriteMultiPart");

  GcsOptions gcsOptions = options.as(GcsOptions.class);
  GcsUtil gcsUtil = gcsOptions.getGcsUtil();
  String srcFilename = "gs://dataflow-samples/wikipedia_edits/wiki_data-000000000000.json";
  String dstFilename =
      gcsOptions.getGcpTempLocation()
          + String.format(
              "/GcsUtilIT-%tF-%<tH-%<tM-%<tS-%<tL.testRewriteMultiPart.copy", new Date());
  gcsUtil.maxBytesRewrittenPerCall = 50L * 1024 * 1024;
  gcsUtil.numRewriteTokensUsed = new AtomicInteger();

  gcsUtil.copy(Lists.newArrayList(srcFilename), Lists.newArrayList(dstFilename));

  assertThat(gcsUtil.numRewriteTokensUsed.get(), equalTo(3));
  assertThat(
      gcsUtil.getObject(GcsPath.fromUri(srcFilename)).getMd5Hash(),
      equalTo(gcsUtil.getObject(GcsPath.fromUri(dstFilename)).getMd5Hash()));

  gcsUtil.remove(Lists.newArrayList(dstFilename));
}
 
Example #27
Source File: GcpOptionsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testProjectMismatch() throws Exception {
  doReturn(fakeProject).when(mockGet).execute();
  when(mockGcsUtil.bucketOwner(any(GcsPath.class))).thenReturn(5L);

  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("Bucket owner does not match the project");
  GcpTempLocationFactory.tryCreateDefaultBucket(options, mockCrmClient);
}
 
Example #28
Source File: GcpOptionsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCannotGetBucketOwner() throws Exception {
  doReturn(fakeProject).when(mockGet).execute();
  when(mockGcsUtil.bucketOwner(any(GcsPath.class))).thenThrow(new IOException("badness"));

  thrown.expect(RuntimeException.class);
  thrown.expectMessage("Unable to determine the owner");
  GcpTempLocationFactory.tryCreateDefaultBucket(options, mockCrmClient);
}
 
Example #29
Source File: GcpOptionsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateBucket() throws Exception {
  doReturn(fakeProject).when(mockGet).execute();
  when(mockGcsUtil.bucketOwner(any(GcsPath.class))).thenReturn(1L);

  String bucket = GcpTempLocationFactory.tryCreateDefaultBucket(options, mockCrmClient);
  assertEquals("gs://dataflow-staging-us-north1-1/temp/", bucket);
}
 
Example #30
Source File: GcsResourceId.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
@Nullable
public String getFilename() {
  if (gcsPath.getNameCount() <= 1) {
    return null;
  } else {
    GcsPath gcsFilename = gcsPath.getFileName();
    return gcsFilename == null ? null : gcsFilename.toString();
  }
}