Java Code Examples for com.google.api.services.storage.Storage#Objects

The following examples show how to use com.google.api.services.storage.Storage#Objects . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GcsUtilTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testNonExistentObjectReturnsEmptyResult() throws IOException {
  GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
  GcsUtil gcsUtil = pipelineOptions.getGcsUtil();

  Storage mockStorage = Mockito.mock(Storage.class);
  gcsUtil.setStorageClient(mockStorage);

  Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
  Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);

  GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/nonexistentfile");
  GoogleJsonResponseException expectedException =
      googleJsonResponseException(
          HttpStatusCodes.STATUS_CODE_NOT_FOUND, "It don't exist", "Nothing here to see");

  when(mockStorage.objects()).thenReturn(mockStorageObjects);
  when(mockStorageObjects.get(pattern.getBucket(), pattern.getObject()))
      .thenReturn(mockStorageGet);
  when(mockStorageGet.execute()).thenThrow(expectedException);

  assertEquals(Collections.emptyList(), gcsUtil.expand(pattern));
}
 
Example 2
Source File: GcsUtilTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testFileSizeNonBatch() throws Exception {
  GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
  GcsUtil gcsUtil = pipelineOptions.getGcsUtil();

  Storage mockStorage = Mockito.mock(Storage.class);
  gcsUtil.setStorageClient(mockStorage);

  Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
  Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);

  when(mockStorage.objects()).thenReturn(mockStorageObjects);
  when(mockStorageObjects.get("testbucket", "testobject")).thenReturn(mockStorageGet);
  when(mockStorageGet.execute())
      .thenReturn(new StorageObject().setSize(BigInteger.valueOf(1000)));

  assertEquals(1000, gcsUtil.fileSize(GcsPath.fromComponents("testbucket", "testobject")));
}
 
Example 3
Source File: CountReads.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
private static boolean GCSURLExists(String url) {
  // ensure data is accessible
  try {
    // if we can read the size, then surely we can read the file
    GcsPath fn = GcsPath.fromUri(url);
    Storage.Objects storageClient = GCSOptions.Methods.createStorageClient(pipelineOptions, auth);
    Storage.Objects.Get getter = storageClient.get(fn.getBucket(), fn.getObject());
    StorageObject object = getter.execute();
    BigInteger size = object.getSize();
    return true;
  } catch (Exception x) {
    return false;
  }
}
 
Example 4
Source File: LoadReadsToBigQuery.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
private static void checkGcsUrlExists(String url) throws IOException {
  // Ensure data is accessible.
  // If we can read the size, then surely we can read the file.
  GcsPath fn = GcsPath.fromUri(url);
  Storage.Objects storageClient = GCSOptions.Methods.createStorageClient(pipelineOptions, auth);
  Storage.Objects.Get getter = storageClient.get(fn.getBucket(), fn.getObject());
  StorageObject object = getter.execute();
  BigInteger size = object.getSize();
}
 
Example 5
Source File: HeaderInfo.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
public static HeaderInfo getHeaderFromBAMFile(Storage.Objects storage, String BAMPath, Iterable<Contig> explicitlyRequestedContigs) throws IOException {
  HeaderInfo result = null;

  // Open and read start of BAM
  LOG.info("Reading header from " + BAMPath);
  final SamReader samReader = BAMIO
      .openBAM(storage, BAMPath, ValidationStringency.DEFAULT_STRINGENCY);
  final SAMFileHeader header = samReader.getFileHeader();
  Contig firstContig = getFirstExplicitContigOrNull(header, explicitlyRequestedContigs);
  if (firstContig == null) {
    final SAMSequenceRecord seqRecord = header.getSequence(0);
    firstContig = new Contig(seqRecord.getSequenceName(), -1, -1);
  }

  LOG.info("Reading first chunk of reads from " + BAMPath);
  final SAMRecordIterator recordIterator = samReader.query(
      firstContig.referenceName, (int)firstContig.start + 1, (int)firstContig.end + 1, false);

  Contig firstShard = null;
  while (recordIterator.hasNext() && result == null) {
    SAMRecord record = recordIterator.next();
    final int alignmentStart = record.getAlignmentStart();
    if (firstShard == null && alignmentStart > firstContig.start &&
        (alignmentStart < firstContig.end || firstContig.end == -1)) {
      firstShard = new Contig(firstContig.referenceName, alignmentStart, alignmentStart);
      LOG.info("Determined first shard to be " + firstShard);
      result = new HeaderInfo(header, firstShard);
    }
  }
  recordIterator.close();
  samReader.close();

  if (result == null) {
    throw new IOException("Did not find reads for the first contig " + firstContig.toString());
  }
  LOG.info("Finished header reading from " + BAMPath);
  return result;
}
 
Example 6
Source File: BAMIOITCase.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
@Test
public void openBAMTest() throws IOException {
 GCSOptions popts = PipelineOptionsFactory.create().as(GCSOptions.class);
 final Storage.Objects storageClient = Transport.newStorageClient(popts).build().objects();

 SamReader samReader = BAMIO.openBAM(storageClient, TEST_BAM_FNAME, ValidationStringency.DEFAULT_STRINGENCY);
 SAMRecordIterator iterator =  samReader.query("1", 550000, 560000, false);
 int readCount = 0;
 while (iterator.hasNext()) {
     iterator.next();
     readCount++;
 }
 Assert.assertEquals("Unexpected count of unmapped reads",
	  EXPECTED_UNMAPPED_READS_COUNT, readCount);
}
 
Example 7
Source File: BAMIO.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
private static SeekableStream openIndexForPath(Storage.Objects storageClient,String gcsStoragePath) {
  final String indexPath = gcsStoragePath + ".bai";
  try {
    return new SeekableGCSStream(storageClient, indexPath);
  } catch (IOException ex) {
    LOG.info("No index for " + indexPath);
    // Ignore if there is no bai file
  }
  return null;
}
 
Example 8
Source File: BAMIO.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
public static ReaderAndIndex openBAMAndExposeIndex(Storage.Objects storageClient, String gcsStoragePath, ValidationStringency stringency) throws IOException {
  ReaderAndIndex result = new ReaderAndIndex();
  result.index = openIndexForPath(storageClient, gcsStoragePath);
  result.reader = openBAMReader(
      openBAMFile(storageClient, gcsStoragePath,result.index), stringency, false, 0);
  return result;
}
 
Example 9
Source File: SeekableGCSStream.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
public SeekableGCSStream(Storage.Objects client, String name) throws IOException {
  LOG.info("Creating SeekableGCSStream: " + name);
  this.client = client;
  object = uriToStorageObject(name);
  get = this.client.get(object.getBucket(), object.getName());
  seek(0);
}
 
Example 10
Source File: GCSOptions.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
public static Storage.Objects createStorageClient(GCSOptions gcsOptions,
  OfflineAuth auth) {
final Storage.Builder storageBuilder = new Storage.Builder(
    gcsOptions.getTransport(),
    gcsOptions.getJsonFactory(),
    null);

return gcsOptions.getGenomicsFactory()
    .fromOfflineAuth(storageBuilder, auth)
    .build()
    .objects();
}
 
Example 11
Source File: GcsUtilTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testRetryFileSizeNonBatch() throws IOException {
  GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
  GcsUtil gcsUtil = pipelineOptions.getGcsUtil();

  Storage mockStorage = Mockito.mock(Storage.class);
  gcsUtil.setStorageClient(mockStorage);

  Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
  Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);

  BackOff mockBackOff =
      BackOffAdapter.toGcpBackOff(FluentBackoff.DEFAULT.withMaxRetries(2).backoff());

  when(mockStorage.objects()).thenReturn(mockStorageObjects);
  when(mockStorageObjects.get("testbucket", "testobject")).thenReturn(mockStorageGet);
  when(mockStorageGet.execute())
      .thenThrow(new SocketTimeoutException("SocketException"))
      .thenThrow(new SocketTimeoutException("SocketException"))
      .thenReturn(new StorageObject().setSize(BigInteger.valueOf(1000)));

  assertEquals(
      1000,
      gcsUtil
          .getObject(
              GcsPath.fromComponents("testbucket", "testobject"),
              mockBackOff,
              new FastNanoClockAndSleeper())
          .getSize()
          .longValue());
  assertEquals(BackOff.STOP, mockBackOff.nextBackOffMillis());
}
 
Example 12
Source File: GcsUtilTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testAccessDeniedObjectThrowsIOException() throws IOException {
  GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
  GcsUtil gcsUtil = pipelineOptions.getGcsUtil();

  Storage mockStorage = Mockito.mock(Storage.class);
  gcsUtil.setStorageClient(mockStorage);

  Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
  Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);

  GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/accessdeniedfile");
  GoogleJsonResponseException expectedException =
      googleJsonResponseException(
          HttpStatusCodes.STATUS_CODE_FORBIDDEN,
          "Waves hand mysteriously",
          "These aren't the buckets you're looking for");

  when(mockStorage.objects()).thenReturn(mockStorageObjects);
  when(mockStorageObjects.get(pattern.getBucket(), pattern.getObject()))
      .thenReturn(mockStorageGet);
  when(mockStorageGet.execute()).thenThrow(expectedException);

  thrown.expect(IOException.class);
  thrown.expectMessage("Unable to get the file object for path");
  gcsUtil.expand(pattern);
}
 
Example 13
Source File: ReadBAMTransform.java    From dataflow-java with Apache License 2.0 4 votes vote down vote up
/**
 * Get reads from a single BAM file by serially reading one shard at a time.
 *
 * This is useful when reads from a subset of genomic regions is desired.
 *
 * This method is marked as deprecated because getReadsFromBAMFilesSharded offers
 * the same functionality but shard reading occurs in parallel.
 *
 * This method should be removed when https://github.com/googlegenomics/dataflow-java/issues/214
 * is fixed.
 *
 * @param p
 * @param pipelineOptions
 * @param auth
 * @param contigs
 * @param options
 * @param BAMFile
 * @param shardingPolicy
 * @return
 * @throws IOException
 */
@Deprecated
public static PCollection<Read> getReadsFromBAMFileSharded(
    Pipeline p,
    PipelineOptions pipelineOptions,
    OfflineAuth auth,
    List<Contig> contigs,
    ReaderOptions options,
    String BAMFile,
    ShardingPolicy shardingPolicy) throws IOException {
  ReadBAMTransform readBAMSTransform = new ReadBAMTransform(options);
  readBAMSTransform.setAuth(auth);
  final Storage.Objects storage = Transport
      .newStorageClient(pipelineOptions.as(GCSOptions.class)).build().objects();
  final List<BAMShard> shardsList = Sharder.shardBAMFile(storage, BAMFile, contigs,
      shardingPolicy);
  PCollection<BAMShard> shards = p.apply(Create
      .of(shardsList));
  return readBAMSTransform.expand(shards);
}
 
Example 14
Source File: BAMIO.java    From dataflow-java with Apache License 2.0 4 votes vote down vote up
public static SamReader openBAM(Storage.Objects storageClient, String gcsStoragePath,
    ValidationStringency stringency, boolean includeFileSource) throws IOException {
  return openBAMReader(openBAMFile(storageClient, gcsStoragePath,
      openIndexForPath(storageClient, gcsStoragePath)), stringency, includeFileSource, 0);
}
 
Example 15
Source File: BAMIO.java    From dataflow-java with Apache License 2.0 4 votes vote down vote up
public static SamReader openBAM(Storage.Objects storageClient, String gcsStoragePath,
    ValidationStringency stringency, boolean includeFileSource, long offset) throws IOException {
  return openBAMReader(openBAMFile(storageClient, gcsStoragePath,
      null), stringency, includeFileSource, offset);
}
 
Example 16
Source File: BAMIO.java    From dataflow-java with Apache License 2.0 4 votes vote down vote up
public static SamReader openBAM(Storage.Objects storageClient, String gcsStoragePath, ValidationStringency stringency) throws IOException {
  return openBAM(storageClient, gcsStoragePath, stringency, false);
}
 
Example 17
Source File: GCSOptions.java    From dataflow-java with Apache License 2.0 4 votes vote down vote up
public static Storage.Objects createStorageClient(
    DoFn<?, ?>.StartBundleContext context, OfflineAuth auth) {
  final GCSOptions gcsOptions =
      context.getPipelineOptions().as(GCSOptions.class);
  return createStorageClient(gcsOptions, auth);
}
 
Example 18
Source File: IntegrationTestHelper.java    From dataflow-java with Apache License 2.0 4 votes vote down vote up
/**
 * Open test output as BAM file - useful if your test writes out a BAM file
 * and you want to validate the contents.
 * @throws IOException
 */
public SamReader openBAM(String bamFilePath) throws IOException {
  final GcsOptions gcsOptions = popts.as(GcsOptions.class);
  final Storage.Objects storage = Transport.newStorageClient(gcsOptions).build().objects();
  return BAMIO.openBAM(storage, bamFilePath, ValidationStringency.LENIENT, true);
}
 
Example 19
Source File: CombineShardsFn.java    From dataflow-java with Apache License 2.0 4 votes vote down vote up
String composeAndCleanUpShards(
    Storage.Objects storage, List<String> shardNames, String dest) throws IOException {
  LOG.info("Combining shards into " + dest);

  final GcsPath destPath = GcsPath.fromUri(dest);

  StorageObject destination = new StorageObject().setContentType(FILE_MIME_TYPE);

  ArrayList<SourceObjects> sourceObjects = new ArrayList<SourceObjects>();
  int addedShardCount = 0;
  for (String shard : shardNames) {
    final GcsPath shardPath = GcsPath.fromUri(shard);
    LOG.info("Adding shard " + shardPath + " for result " + dest);
    sourceObjects.add(new SourceObjects().setName(shardPath.getObject()));
    addedShardCount++;
  }
  LOG.info("Added " + addedShardCount + " shards for composition");
  Metrics.counter(CombineShardsFn.class, "Files to combine").inc(addedShardCount);

  final ComposeRequest composeRequest =
      new ComposeRequest().setDestination(destination).setSourceObjects(sourceObjects);
  final Compose compose =
      storage.compose(destPath.getBucket(), destPath.getObject(), composeRequest);
  final StorageObject result = compose.execute();
  final String combineResult = GcsPath.fromObject(result).toString();
  LOG.info("Combine result is " + combineResult);
  Metrics.counter(CombineShardsFn.class, "Files combined").inc(addedShardCount);
  Metrics.counter(CombineShardsFn.class, "Files created").inc();
  for (SourceObjects sourceObject : sourceObjects) {
    final String shardToDelete = sourceObject.getName();
    LOG.info("Cleaning up shard  " + shardToDelete + " for result " + dest);
    int retryCount = MAX_RETRY_COUNT;
    boolean done = false;
    while (!done && retryCount > 0) {
      try {
        storage.delete(destPath.getBucket(), shardToDelete).execute();
        done = true;
      } catch (Exception ex) {
        LOG.info("Error deleting " + ex.getMessage() + retryCount + " retries left");
      }
      retryCount--;
    }
    Metrics.counter(CombineShardsFn.class, "Files deleted").inc();
  }

  return combineResult;
}
 
Example 20
Source File: GcsUtilTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testRecursiveGlobExpansion() throws IOException {
  GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
  GcsUtil gcsUtil = pipelineOptions.getGcsUtil();

  Storage mockStorage = Mockito.mock(Storage.class);
  gcsUtil.setStorageClient(mockStorage);

  Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
  Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);
  Storage.Objects.List mockStorageList = Mockito.mock(Storage.Objects.List.class);

  Objects modelObjects = new Objects();
  List<StorageObject> items = new ArrayList<>();
  // A directory
  items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/"));

  // Files within the directory
  items.add(new StorageObject().setBucket("testbucket").setName("test/directory/file1.txt"));
  items.add(new StorageObject().setBucket("testbucket").setName("test/directory/file2.txt"));
  items.add(new StorageObject().setBucket("testbucket").setName("test/directory/file3.txt"));
  items.add(new StorageObject().setBucket("testbucket").setName("test/directory/otherfile"));
  items.add(new StorageObject().setBucket("testbucket").setName("test/directory/anotherfile"));
  items.add(new StorageObject().setBucket("testbucket").setName("test/file4.txt"));

  modelObjects.setItems(items);

  when(mockStorage.objects()).thenReturn(mockStorageObjects);
  when(mockStorageObjects.get("testbucket", "test/directory/otherfile"))
      .thenReturn(mockStorageGet);
  when(mockStorageObjects.list("testbucket")).thenReturn(mockStorageList);
  when(mockStorageGet.execute())
      .thenReturn(
          new StorageObject().setBucket("testbucket").setName("test/directory/otherfile"));
  when(mockStorageList.execute()).thenReturn(modelObjects);

  {
    GcsPath pattern = GcsPath.fromUri("gs://testbucket/test/**/*.txt");
    List<GcsPath> expectedFiles =
        ImmutableList.of(
            GcsPath.fromUri("gs://testbucket/test/directory/file1.txt"),
            GcsPath.fromUri("gs://testbucket/test/directory/file2.txt"),
            GcsPath.fromUri("gs://testbucket/test/directory/file3.txt"),
            GcsPath.fromUri("gs://testbucket/test/file4.txt"));

    assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
  }
}