Java Code Examples for com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem

The following examples show how to use com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hadoop-connectors   Source File: CoopLockFsck.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
  String command = args[0];
  checkArgument(FSCK_COMMANDS.contains(command), "Unknown %s command, should be %s", command);

  int expectedArgsNumber = COMMAND_CHECK.equals(command) ? 2 : 3;
  checkArgument(
      args.length == expectedArgsNumber,
      "%s arguments should be specified for %s command, but were: %s",
      expectedArgsNumber,
      command,
      Arrays.asList(args));

  String bucket = args[1];
  checkArgument(
      bucket.startsWith(GoogleCloudStorageFileSystem.SCHEME + "://"),
      "bucket parameter should have 'gs://' scheme");

  String operationId = COMMAND_CHECK.equals(command) ? null : args[2];

  return new CoopLockFsckRunner(getConf(), URI.create(bucket), command, operationId).run();
}
 
Example 2
@Test
public void getXAttr_nonGhfsMetadata() throws Exception {
  GoogleCloudStorageFileSystem gcsFs = ((GoogleHadoopFileSystem) ghfs).getGcsFs();
  URI fileUri = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath();
  Path filePath = ghfsHelper.castAsHadoopPath(fileUri);

  ghfsHelper.writeFile(filePath, "obj-test-get-xattr-extra", 1, /* overwrite= */ false);

  UpdatableItemInfo updateInfo =
      new UpdatableItemInfo(
          StorageResourceId.fromStringPath(filePath.toString()),
          ImmutableMap.of("non-ghfs-xattr-key", "non-ghfs-xattr-value".getBytes(UTF_8)));
  gcsFs.getGcs().updateItems(ImmutableList.of(updateInfo));

  ghfs.setXAttr(filePath, "test-xattr-some", "test-xattr-value".getBytes(UTF_8));

  assertThat(toStringValuesMap(gcsFs.getFileInfo(filePath.toUri()).getAttributes()))
      .containsExactly(
          "non-ghfs-xattr-key", "non-ghfs-xattr-value",
          "GHFS_XATTR_test-xattr-some", "test-xattr-value");
  assertThat(toStringValuesMap(ghfs.getXAttrs(filePath)))
      .containsExactly("test-xattr-some", "test-xattr-value");

  // Cleanup.
  assertThat(ghfs.delete(filePath, true)).isTrue();
}
 
Example 3
@BeforeClass
public static void before() throws Throwable {
  String projectId =
      checkNotNull(TestConfiguration.getInstance().getProjectId(), "projectId can not be null");
  String appName = GoogleCloudStorageIntegrationHelper.APP_NAME;
  Credential credential =
      checkNotNull(GoogleCloudStorageTestHelper.getCredential(), "credential must not be null");

  gcsOptions =
      GoogleCloudStorageOptions.builder().setAppName(appName).setProjectId(projectId).build();
  httpRequestInitializer =
      new RetryHttpInitializer(credential, gcsOptions.toRetryHttpInitializerOptions());

  GoogleCloudStorageFileSystem gcsfs =
      new GoogleCloudStorageFileSystem(
          credential,
          GoogleCloudStorageFileSystemOptions.builder()
              .setBucketDeleteEnabled(true)
              .setCloudStorageOptions(gcsOptions)
              .build());

  gcsfsIHelper = new GoogleCloudStorageFileSystemIntegrationHelper(gcsfs);
  gcsfsIHelper.beforeAllTests();
}
 
Example 4
Source Project: presto   Source File: GcsConfigurationProvider.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void updateConfiguration(Configuration configuration, HdfsContext context, URI uri)
{
    if (!uri.getScheme().equals(GoogleCloudStorageFileSystem.SCHEME)) {
        return;
    }

    String accessToken = context.getIdentity().getExtraCredentials().get(GCS_OAUTH_KEY);
    if (accessToken != null) {
        configuration.set(GCS_ACCESS_TOKEN_CONF, accessToken);
        setCacheKey(configuration, accessToken);
    }
}
 
Example 5
/**
 * Constructs an instance of {@link GoogleHadoopFileSystemBase} using the provided
 * GoogleCloudStorageFileSystem; initialize() will not re-initialize it.
 */
// TODO(b/120887495): This @VisibleForTesting annotation was being ignored by prod code.
// Please check that removing it is correct, and remove this comment along with it.
// @VisibleForTesting
GoogleHadoopFileSystemBase(GoogleCloudStorageFileSystem gcsFs) {
  checkNotNull(gcsFs, "gcsFs must not be null");
  setGcsFs(gcsFs);
}
 
Example 6
/**
 * Determines based on suitability of {@code fixedPath} whether to use flat globbing logic where
 * we use a single large listing during globStatus to then perform the core globbing logic
 * in-memory.
 */
@VisibleForTesting
boolean couldUseFlatGlob(Path fixedPath) {
  // Only works for filesystems where the base Hadoop Path scheme matches the underlying URI
  // scheme for GCS.
  if (!getUri().getScheme().equals(GoogleCloudStorageFileSystem.SCHEME)) {
    logger.atFinest().log(
        "Flat glob is on, but doesn't work for scheme '%s', using default behavior.",
        getUri().getScheme());
    return false;
  }

  // The full pattern should have a wildcard, otherwise there's no point doing the flat glob.
  GlobPattern fullPattern = new GlobPattern(fixedPath.toString());
  if (!fullPattern.hasWildcard()) {
    logger.atFinest().log(
        "Flat glob is on, but Path '%s' has no wildcard, using default behavior.", fixedPath);
    return false;
  }

  // To use a flat glob, there must be an authority defined.
  if (isNullOrEmpty(fixedPath.toUri().getAuthority())) {
    logger.atFinest().log(
        "Flat glob is on, but Path '%s' has a empty authority, using default behavior.",
        fixedPath);
    return false;
  }

  // And the authority must not contain a wildcard.
  GlobPattern authorityPattern = new GlobPattern(fixedPath.toUri().getAuthority());
  if (authorityPattern.hasWildcard()) {
    logger.atFinest().log(
        "Flat glob is on, but Path '%s' has a wildcard authority, using default behavior.",
        fixedPath);
    return false;
  }

  return true;
}
 
Example 7
private GoogleCloudStorageFileSystem createGcsFs(Configuration config) throws IOException {
  GoogleCloudStorageFileSystemOptions gcsFsOptions =
      GoogleHadoopFileSystemConfiguration.getGcsFsOptionsBuilder(config).build();

  Credential credential;
  try {
    credential = getCredential(config, gcsFsOptions);
  } catch (GeneralSecurityException e) {
    throw new RuntimeException(e);
  }

  return new GoogleCloudStorageFileSystem(credential, gcsFsOptions);
}
 
Example 8
Source Project: hadoop-connectors   Source File: GoogleHadoopFileSystem.java    License: Apache License 2.0 5 votes vote down vote up
/** Sets and validates the root bucket. */
@Override
@VisibleForTesting
protected void configureBuckets(GoogleCloudStorageFileSystem gcsFs) throws IOException {
  rootBucket = initUri.getAuthority();
  checkArgument(rootBucket != null, "No bucket specified in GCS URI: %s", initUri);
  // Validate root bucket name
  UriPaths.fromStringPathComponents(
      rootBucket, /* objectName= */ null, /* allowEmptyObjectName= */ true);
  logger.atFine().log("configureBuckets: GoogleHadoopFileSystem root bucket is '%s'", rootBucket);
}
 
Example 9
/** Test implicit directories. */
@Test
public void testImplicitDirectory() throws IOException {
  String bucketName = sharedBucketName1;
  GoogleHadoopFileSystemBase myghfs = (GoogleHadoopFileSystemBase) ghfs;
  GoogleCloudStorageFileSystem gcsfs = myghfs.getGcsFs();
  URI seedUri = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath();
  Path parentPath = ghfsHelper.castAsHadoopPath(seedUri);
  URI parentUri = myghfs.getGcsPath(parentPath);

  // A subdir path that looks like gs://<bucket>/<generated-tempdir>/foo-subdir where
  // neither the subdir nor gs://<bucket>/<generated-tempdir> exist yet.
  Path subdirPath = new Path(parentPath, "foo-subdir");
  URI subdirUri = myghfs.getGcsPath(subdirPath);

  Path leafPath = new Path(subdirPath, "bar-subdir");
  URI leafUri = myghfs.getGcsPath(leafPath);
  gcsfs.mkdir(leafUri);

  boolean inferredDirExists =
      gcsfs.getOptions().getCloudStorageOptions().isInferImplicitDirectoriesEnabled();

  assertDirectory(gcsfs, leafUri, /* exists= */ true);
  assertDirectory(gcsfs, subdirUri, /* exists= */ inferredDirExists);
  assertDirectory(gcsfs, parentUri, /* exists= */ inferredDirExists);

  ghfsHelper.clearBucket(bucketName);
}
 
Example 10
@Test
public void testRepairDirectory_afterFileDelete() throws IOException {
  GoogleHadoopFileSystemBase myghfs = (GoogleHadoopFileSystemBase) ghfs;
  GoogleCloudStorageFileSystem gcsfs = myghfs.getGcsFs();
  GoogleCloudStorage gcs = gcsfs.getGcs();
  URI seedUri = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath();
  Path dirPath = ghfsHelper.castAsHadoopPath(seedUri);
  URI dirUri = myghfs.getGcsPath(dirPath);

  // A subdir path that looks like gs://<bucket>/<generated-tempdir>/foo-subdir where
  // neither the subdir nor gs://<bucket>/<generated-tempdir> exist yet.
  Path emptyObject = new Path(dirPath, "empty-object");
  URI objUri = myghfs.getGcsPath(emptyObject);
  StorageResourceId resource = StorageResourceId.fromUriPath(objUri, false);
  gcs.createEmptyObject(resource);

  boolean inferImplicitDirectories =
      gcsfs.getOptions().getCloudStorageOptions().isInferImplicitDirectoriesEnabled();
  boolean autoRepairImplicitDirectories =
      gcsfs.getOptions().getCloudStorageOptions().isAutoRepairImplicitDirectoriesEnabled();

  assertDirectory(gcsfs, dirUri, /* exists= */ inferImplicitDirectories);

  gcsfs.delete(objUri, false);

  // Implicit directory created after deletion of the sole object in the directory
  assertDirectory(gcsfs, dirUri, /* exists= */ autoRepairImplicitDirectories);

  ghfsHelper.clearBucket(resource.getBucketName());
}
 
Example 11
@Test
public void testRepairDirectory_afterSubdirectoryDelete() throws IOException {
  GoogleHadoopFileSystemBase myghfs = (GoogleHadoopFileSystemBase) ghfs;
  GoogleCloudStorageFileSystem gcsfs = myghfs.getGcsFs();
  GoogleCloudStorage gcs = gcsfs.getGcs();

  // only if directory inferring is enabled, the directory without the implicit
  // directory entry can be deleted without the FileNotFoundException
  assumeTrue(gcsfs.getOptions().getCloudStorageOptions().isInferImplicitDirectoriesEnabled());

  URI seedUri = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath();
  Path dirPath = ghfsHelper.castAsHadoopPath(seedUri);
  URI dirUri = myghfs.getGcsPath(dirPath);
  Path subDir = new Path(dirPath, "subdir");
  URI subdirUri = myghfs.getGcsPath(subDir);

  // A subdir path that looks like gs://<bucket>/<generated-tempdir>/foo-subdir where
  // neither the subdir nor gs://<bucket>/<generated-tempdir> exist yet.
  Path emptyObject = new Path(subDir, "empty-object");
  URI objUri = myghfs.getGcsPath(emptyObject);
  StorageResourceId resource = StorageResourceId.fromUriPath(objUri, false);
  gcs.createEmptyObject(resource);

  boolean autoRepairImplicitDirectories =
      gcsfs.getOptions().getCloudStorageOptions().isAutoRepairImplicitDirectoriesEnabled();

  assertDirectory(gcsfs, dirUri, /* exists= */ true);
  assertDirectory(gcsfs, subdirUri, /* exists= */ true);

  gcsfs.delete(subdirUri, true);

  // Implicit directory created after deletion of the sole object in the directory
  assertDirectory(gcsfs, dirUri, /* exists= */ autoRepairImplicitDirectories);

  ghfsHelper.clearBucket(resource.getBucketName());
}
 
Example 12
@Test
public void testRepairDirectory_afterFileRename() throws IOException {
  GoogleHadoopFileSystemBase myghfs = (GoogleHadoopFileSystemBase) ghfs;
  GoogleCloudStorageFileSystem gcsfs = myghfs.getGcsFs();
  GoogleCloudStorage gcs = gcsfs.getGcs();

  URI seedUri = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath();
  Path dirPath = ghfsHelper.castAsHadoopPath(seedUri);
  URI dirUri = myghfs.getGcsPath(dirPath);

  // A subdir path that looks like gs://<bucket>/<generated-tempdir>/foo-subdir where
  // neither the subdir nor gs://<bucket>/<generated-tempdir> exist yet.
  Path emptyObject = new Path(dirPath, "empty-object");
  URI objUri = myghfs.getGcsPath(emptyObject);
  StorageResourceId resource = StorageResourceId.fromUriPath(objUri, false);
  gcs.createEmptyObject(resource);

  boolean inferImplicitDirectories =
      gcsfs.getOptions().getCloudStorageOptions().isInferImplicitDirectoriesEnabled();
  boolean autoRepairImplicitDirectories =
      gcsfs.getOptions().getCloudStorageOptions().isAutoRepairImplicitDirectoriesEnabled();

  assertDirectory(gcsfs, dirUri, /* exists= */ inferImplicitDirectories);

  gcsfs.rename(objUri, objUri.resolve(".."));

  // Implicit directory created after deletion of the sole object in the directory
  assertDirectory(gcsfs, dirUri, /* exists= */ autoRepairImplicitDirectories);

  ghfsHelper.clearBucket(resource.getBucketName());
}
 
Example 13
@Test
public void testRepairDirectory_afterSubdirectoryRename() throws IOException {
  String bucketName = sharedBucketName1;
  GoogleHadoopFileSystemBase myghfs = (GoogleHadoopFileSystemBase) ghfs;
  GoogleCloudStorageFileSystem gcsfs = myghfs.getGcsFs();
  GoogleCloudStorage gcs = gcsfs.getGcs();

  // only if directory inferring is enabled, the directory without the implicit
  // directory entry can be deleted without the FileNotFoundException
  assumeTrue(gcsfs.getOptions().getCloudStorageOptions().isInferImplicitDirectoriesEnabled());

  URI seedUri = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath();
  Path dirPath = ghfsHelper.castAsHadoopPath(seedUri);
  URI dirUri = myghfs.getGcsPath(dirPath);
  Path subDir = new Path(dirPath, "subdir");
  URI subdirUri = myghfs.getGcsPath(subDir);

  // A subdir path that looks like gs://<bucket>/<generated-tempdir>/foo-subdir where
  // neither the subdir nor gs://<bucket>/<generated-tempdir> exist yet.
  Path emptyObject = new Path(subDir, "empty-object");
  URI objUri = myghfs.getGcsPath(emptyObject);
  StorageResourceId resource = StorageResourceId.fromUriPath(objUri, false);
  gcs.createEmptyObject(resource);

  boolean autoRepairImplicitDirectories =
      gcsfs.getOptions().getCloudStorageOptions().isAutoRepairImplicitDirectoriesEnabled();

  assertDirectory(gcsfs, dirUri, /* exists= */ true);
  assertDirectory(gcsfs, subdirUri, /* exists= */ true);

  gcsfs.rename(subdirUri, seedUri.resolve("."));

  // Implicit directory created after deletion of the sole object in the directory
  assertDirectory(gcsfs, dirUri, /* exists= */ autoRepairImplicitDirectories);

  ghfsHelper.clearBucket(bucketName);
}
 
Example 14
private static void assertDirectory(GoogleCloudStorageFileSystem gcsfs, URI path, boolean exists)
    throws IOException {
  assertWithMessage("Expected to %s: %s", exists ? "exist" : "not exist", path)
      .that(gcsfs.exists(path))
      .isEqualTo(exists);
  assertWithMessage("Expected to be a directory: %s", path)
      .that(gcsfs.getFileInfo(path).isDirectory())
      .isTrue();
}
 
Example 15
/**
 * Creates an instance of a bucket-rooted GoogleHadoopFileSystemBase using an in-memory underlying
 * store.
 */
public static GoogleHadoopFileSystem createInMemoryGoogleHadoopFileSystem() throws IOException {
  GoogleCloudStorageOptions gcsOptions = GoogleCloudStorageOptions.DEFAULT;
  GoogleCloudStorageFileSystemOptions.Builder fsOptionsBuilder =
      GoogleCloudStorageFileSystemOptions.builder().setCloudStorageOptions(gcsOptions);
  GoogleCloudStorageFileSystem memoryGcsFs =
      new GoogleCloudStorageFileSystem(
          new InMemoryGoogleCloudStorage(gcsOptions), fsOptionsBuilder.build());
  GoogleHadoopFileSystem ghfs = new GoogleHadoopFileSystem(memoryGcsFs);
  initializeInMemoryFileSystem(ghfs, IN_MEMORY_TEST_BUCKET);
  return ghfs;
}
 
Example 16
/** Validates success path in configureBuckets(). */
@Test
@Override
public void testConfigureBucketsSuccess() throws IOException {
  String rootBucketName = "gs://" + ghfsHelper.getUniqueBucketName("configure-root");

  URI initUri = new Path(rootBucketName).toUri();

  // To test configureBuckets which occurs after GCSFS initialization in configure(), while
  // still being reusable by derived unittests (we can't call loadConfig in a test case which
  // is inherited by a derived test), we will use the constructor which already provides a (fake)
  // GCSFS and skip the portions of the config specific to GCSFS.

  GoogleCloudStorageFileSystem fakeGcsFs =
      new GoogleCloudStorageFileSystem(new InMemoryGoogleCloudStorage());

  GoogleHadoopFileSystem fs = new GoogleHadoopFileSystem(fakeGcsFs);
  fs.initUri = initUri;
  fs.configureBuckets(fakeGcsFs);

  // Verify that config settings were set correctly.
  assertThat(fs.initUri).isEqualTo(initUri);

  initUri = new Path("gs://" + ghfsHelper.sharedBucketName1 + "/foo").toUri();
  fs = new GoogleHadoopFileSystem(fakeGcsFs);
  fs.initUri = initUri;
  fs.configureBuckets(fakeGcsFs);

  // Verify that config settings were set correctly.
  assertThat(fs.initUri).isEqualTo(initUri);

  assertThat(fs.getRootBucketName()).isEqualTo(initUri.getAuthority());
}
 
Example 17
/** Validates success path when there is a root bucket but no system bucket is specified. */
@Test
@Override
public void testConfigureBucketsWithRootBucketButNoSystemBucket() throws IOException {
  String rootBucketName = ghfsHelper.getUniqueBucketName("configure-root");
  URI initUri = new Path("gs://" + rootBucketName).toUri();
  GoogleCloudStorageFileSystem fakeGcsFs =
      new GoogleCloudStorageFileSystem(new InMemoryGoogleCloudStorage());
  GoogleHadoopFileSystem fs = new GoogleHadoopFileSystem(fakeGcsFs);
  fs.initUri = initUri;
  fs.configureBuckets(fakeGcsFs);

  // Verify that config settings were set correctly.
  assertThat(fs.initUri).isEqualTo(initUri);
}
 
Example 18
/** Validates that exception thrown if no root bucket is specified. */
@Test
@Override
public void testConfigureBucketsWithNeitherRootBucketNorSystemBucket() throws IOException {
  URI initUri = new Path("gs://").toUri();
  final GoogleCloudStorageFileSystem fakeGcsFs =
      new GoogleCloudStorageFileSystem(new InMemoryGoogleCloudStorage());
  final GoogleHadoopFileSystem fs = new GoogleHadoopFileSystem(fakeGcsFs);
  fs.initUri = initUri;

  IllegalArgumentException thrown =
      assertThrows(IllegalArgumentException.class, () -> fs.configureBuckets(fakeGcsFs));

  assertThat(thrown).hasMessageThat().isEqualTo("No bucket specified in GCS URI: gs:/");
}
 
Example 19
/**
 * Sets the GcsFs to be used. This should be called before an instance is constructed in order to
 * create a GCS storage over which to run the in-memory GHFS.
 */
private static synchronized GoogleCloudStorageFileSystem createUnderlyingStorage() {
  if (inMemoryGcsFs != null) {
    return inMemoryGcsFs;
  }
  return new GoogleCloudStorageFileSystem(new InMemoryGoogleCloudStorage());
}
 
Example 20
@AfterClass
public static void afterClass() throws Throwable {
  gcsfsIHelper.afterAllTests();
  GoogleCloudStorageFileSystem gcsfs = gcsfsIHelper.gcsfs;
  assertThat(gcsfs.exists(new URI("gs://" + gcsfsIHelper.sharedBucketName1))).isFalse();
  assertThat(gcsfs.exists(new URI("gs://" + gcsfsIHelper.sharedBucketName2))).isFalse();
}
 
Example 21
@Test
public void noOperations_checkSucceeds() throws Exception {
  String bucketName = gcsfsIHelper.createUniqueBucket("coop-no-op-check-succeeds");
  URI bucketUri = new URI("gs://" + bucketName + "/");
  String fileName = "file";
  URI dirUri = bucketUri.resolve("delete_" + UUID.randomUUID() + "/");

  // create file to delete
  gcsfsIHelper.writeTextFile(bucketName, dirUri.resolve(fileName).getPath(), "file_content");

  GoogleCloudStorageFileSystemOptions gcsFsOptions = newGcsFsOptions();

  GoogleCloudStorageFileSystem gcsFs = newGcsFs(gcsFsOptions, httpRequestInitializer);

  assertThat(gcsFs.exists(dirUri)).isTrue();
  assertThat(gcsFs.exists(dirUri.resolve(fileName))).isTrue();

  CoopLockFsck fsck = new CoopLockFsck();
  fsck.setConf(getTestConfiguration());

  fsck.run(new String[] {"--check", "gs://" + bucketName});

  assertThat(gcsFs.exists(dirUri)).isTrue();
  assertThat(gcsFs.exists(dirUri.resolve(fileName))).isTrue();

  assertThat(gcsFs.exists(bucketUri.resolve(LOCK_DIRECTORY))).isFalse();
}
 
Example 22
private static void failRenameOperation(
    URI srcDirUri,
    URI dstDirUri,
    GoogleCloudStorageFileSystemOptions options,
    Predicate<HttpRequest> failPredicate)
    throws IOException {
  HttpRequestInitializer failingRequestInitializer = newFailingRequestInitializer(failPredicate);
  GoogleCloudStorageFileSystem failingGcsFs = newGcsFs(options, failingRequestInitializer);

  Exception e = assertThrows(Exception.class, () -> failingGcsFs.rename(srcDirUri, dstDirUri));
  assertThat(e).hasCauseThat().hasCauseThat().hasMessageThat().endsWith("Injected failure");
}
 
Example 23
private static void failDeleteOperation(
    GoogleCloudStorageFileSystemOptions gcsFsOptions, String bucketName, URI dirUri)
    throws Exception {
  HttpRequestInitializer failingRequestInitializer =
      newFailingRequestInitializer(
          request ->
              "DELETE".equals(request.getRequestMethod())
                  && request.getUrl().toString().contains("/b/" + bucketName + "/o/"));
  GoogleCloudStorageFileSystem failingGcsFs = newGcsFs(gcsFsOptions, failingRequestInitializer);

  IOException e =
      assertThrows(IOException.class, () -> failingGcsFs.delete(dirUri, /* recursive= */ true));
  assertThat(e).hasCauseThat().hasCauseThat().hasMessageThat().endsWith("Injected failure");
}
 
Example 24
private static GoogleCloudStorageFileSystem newGcsFs(
    GoogleCloudStorageFileSystemOptions gcsFsOptions, HttpRequestInitializer requestInitializer)
    throws IOException {
  GoogleCloudStorageImpl gcs =
      new GoogleCloudStorageImpl(gcsFsOptions.getCloudStorageOptions(), requestInitializer);
  return new GoogleCloudStorageFileSystem(gcs, gcsFsOptions);
}
 
Example 25
Source Project: pentaho-hadoop-shims   Source File: GcsConf.java    License: Apache License 2.0 5 votes vote down vote up
@Override public Path mapPath( Path pvfsPath ) {
  validatePath( pvfsPath );
  String[] splitPath = pvfsPath.toUri().getPath().split( "/" );

  Preconditions.checkArgument( splitPath.length > 0 );
  String bucket = splitPath[1];
  String path = SEPARATOR + Arrays.stream( splitPath ).skip( 2 ).collect( Collectors.joining( SEPARATOR ) );
  try {
    return new Path( new URI( GoogleCloudStorageFileSystem.SCHEME, bucket, path, null ) );
  } catch ( URISyntaxException e ) {
    throw new IllegalStateException( e );
  }
}
 
Example 26
Source Project: gatk   Source File: BucketUtils.java    License: BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private static boolean isEligibleForPrefetching(final String scheme){
    return scheme != null
            && (scheme.equals(GoogleCloudStorageFileSystem.SCHEME)
            || scheme.equals(HttpFileSystemProvider.SCHEME)
            || scheme.equals(HttpsFileSystemProvider.SCHEME));
}
 
Example 27
private void commitCurrentFile() throws IOException {
  // TODO(user): Optimize the case where 0 bytes have been written in the current component
  // to return early.
  WritableByteChannel innerChannel = curDelegate.getInternalChannel();
  curDelegate.close();

  long generationId = StorageResourceId.UNKNOWN_GENERATION_ID;
  if (innerChannel instanceof GoogleCloudStorageItemInfo.Provider) {
    generationId = ((GoogleCloudStorageItemInfo.Provider) innerChannel)
        .getItemInfo().getContentGeneration();
    logger.atFine().log(
        "innerChannel is GoogleCloudStorageItemInfo.Provider; closed generationId %s.",
        generationId);
  } else {
    logger.atFine().log("innerChannel NOT instanceof provider: %s", innerChannel.getClass());
  }

  // On the first component, curGcsPath will equal finalGcsPath, and no compose() call is
  // necessary. Otherwise, we compose in-place into the destination object and then delete
  // the temporary object.
  if (!finalGcsPath.equals(curGcsPath)) {
    StorageResourceId destResourceId =
        StorageResourceId.fromStringPath(finalGcsPath.toString(), curDestGenerationId);
    final StorageResourceId tempResourceId =
        StorageResourceId.fromStringPath(curGcsPath.toString(), generationId);
    if (!destResourceId.getBucketName().equals(tempResourceId.getBucketName())) {
      throw new IllegalStateException(String.format(
          "Destination bucket in path '%s' doesn't match temp file bucket in path '%s'",
          finalGcsPath, curGcsPath));
    }
    GoogleCloudStorageItemInfo composedObject = ghfs.getGcsFs().getGcs().composeObjects(
        ImmutableList.of(destResourceId, tempResourceId),
        destResourceId,
        GoogleCloudStorageFileSystem.objectOptionsFromFileOptions(fileOptions));
    curDestGenerationId = composedObject.getContentGeneration();
    deletionFutures.add(
        cleanupThreadpool.submit(
            () -> {
              ghfs.getGcsFs().getGcs().deleteObjects(ImmutableList.of(tempResourceId));
              return null;
            }));
  } else {
    // First commit was direct to the destination; the generationId of the object we just
    // committed will be used as the destination generation id for future compose calls.
    curDestGenerationId = generationId;
  }
}
 
Example 28
private void setGcsFs(GoogleCloudStorageFileSystem gcsFs) {
  this.gcsFsSupplier = Suppliers.ofInstance(gcsFs);
  this.gcsFsInitialized = true;
}
 
Example 29
/** Gets GCS FS instance. */
public GoogleCloudStorageFileSystem getGcsFs() {
  return gcsFsSupplier.get();
}
 
Example 30
/**
 * Configures GHFS using the supplied configuration.
 *
 * @param config Hadoop configuration object.
 */
private synchronized void configure(Configuration config) throws IOException {
  logger.atFine().log("GHFS_ID=%s: configure(config: %s)", GHFS_ID, config);

  overrideConfigFromFile(config);
  // Set this configuration as the default config for this instance.
  setConf(config);

  enableFlatGlob = GCS_FLAT_GLOB_ENABLE.get(config, config::getBoolean);
  enableConcurrentGlob = GCS_CONCURRENT_GLOB_ENABLE.get(config, config::getBoolean);
  checksumType = GCS_FILE_CHECKSUM_TYPE.get(config, config::getEnum);
  defaultBlockSize = BLOCK_SIZE.get(config, config::getLong);
  reportedPermissions = new FsPermission(PERMISSIONS_TO_REPORT.get(config, config::get));

  if (gcsFsSupplier == null) {
    if (GCS_LAZY_INITIALIZATION_ENABLE.get(config, config::getBoolean)) {
      gcsFsSupplier =
          Suppliers.memoize(
              () -> {
                try {
                  GoogleCloudStorageFileSystem gcsFs = createGcsFs(config);

                  configureBuckets(gcsFs);
                  configureWorkingDirectory(config);
                  gcsFsInitialized = true;

                  return gcsFs;
                } catch (IOException e) {
                  throw new RuntimeException("Failed to create GCS FS", e);
                }
              });
    } else {
      setGcsFs(createGcsFs(config));
      configureBuckets(getGcsFs());
      configureWorkingDirectory(config);
    }
  } else {
    configureBuckets(getGcsFs());
    configureWorkingDirectory(config);
  }
}