com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem Java Examples

The following examples show how to use com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CoopLockFsck.java    From hadoop-connectors with Apache License 2.0 6 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
  String command = args[0];
  checkArgument(FSCK_COMMANDS.contains(command), "Unknown %s command, should be %s", command);

  int expectedArgsNumber = COMMAND_CHECK.equals(command) ? 2 : 3;
  checkArgument(
      args.length == expectedArgsNumber,
      "%s arguments should be specified for %s command, but were: %s",
      expectedArgsNumber,
      command,
      Arrays.asList(args));

  String bucket = args[1];
  checkArgument(
      bucket.startsWith(GoogleCloudStorageFileSystem.SCHEME + "://"),
      "bucket parameter should have 'gs://' scheme");

  String operationId = COMMAND_CHECK.equals(command) ? null : args[2];

  return new CoopLockFsckRunner(getConf(), URI.create(bucket), command, operationId).run();
}
 
Example #2
Source File: CoopLockRepairIntegrationTest.java    From hadoop-connectors with Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void before() throws Throwable {
  String projectId =
      checkNotNull(TestConfiguration.getInstance().getProjectId(), "projectId can not be null");
  String appName = GoogleCloudStorageIntegrationHelper.APP_NAME;
  Credential credential =
      checkNotNull(GoogleCloudStorageTestHelper.getCredential(), "credential must not be null");

  gcsOptions =
      GoogleCloudStorageOptions.builder().setAppName(appName).setProjectId(projectId).build();
  httpRequestInitializer =
      new RetryHttpInitializer(credential, gcsOptions.toRetryHttpInitializerOptions());

  GoogleCloudStorageFileSystem gcsfs =
      new GoogleCloudStorageFileSystem(
          credential,
          GoogleCloudStorageFileSystemOptions.builder()
              .setBucketDeleteEnabled(true)
              .setCloudStorageOptions(gcsOptions)
              .build());

  gcsfsIHelper = new GoogleCloudStorageFileSystemIntegrationHelper(gcsfs);
  gcsfsIHelper.beforeAllTests();
}
 
Example #3
Source File: GoogleHadoopFileSystemXAttrsIntegrationTest.java    From hadoop-connectors with Apache License 2.0 6 votes vote down vote up
@Test
public void getXAttr_nonGhfsMetadata() throws Exception {
  GoogleCloudStorageFileSystem gcsFs = ((GoogleHadoopFileSystem) ghfs).getGcsFs();
  URI fileUri = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath();
  Path filePath = ghfsHelper.castAsHadoopPath(fileUri);

  ghfsHelper.writeFile(filePath, "obj-test-get-xattr-extra", 1, /* overwrite= */ false);

  UpdatableItemInfo updateInfo =
      new UpdatableItemInfo(
          StorageResourceId.fromStringPath(filePath.toString()),
          ImmutableMap.of("non-ghfs-xattr-key", "non-ghfs-xattr-value".getBytes(UTF_8)));
  gcsFs.getGcs().updateItems(ImmutableList.of(updateInfo));

  ghfs.setXAttr(filePath, "test-xattr-some", "test-xattr-value".getBytes(UTF_8));

  assertThat(toStringValuesMap(gcsFs.getFileInfo(filePath.toUri()).getAttributes()))
      .containsExactly(
          "non-ghfs-xattr-key", "non-ghfs-xattr-value",
          "GHFS_XATTR_test-xattr-some", "test-xattr-value");
  assertThat(toStringValuesMap(ghfs.getXAttrs(filePath)))
      .containsExactly("test-xattr-some", "test-xattr-value");

  // Cleanup.
  assertThat(ghfs.delete(filePath, true)).isTrue();
}
 
Example #4
Source File: GoogleHadoopFileSystemTestBase.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
@Test
public void testRepairDirectory_afterFileRename() throws IOException {
  GoogleHadoopFileSystemBase myghfs = (GoogleHadoopFileSystemBase) ghfs;
  GoogleCloudStorageFileSystem gcsfs = myghfs.getGcsFs();
  GoogleCloudStorage gcs = gcsfs.getGcs();

  URI seedUri = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath();
  Path dirPath = ghfsHelper.castAsHadoopPath(seedUri);
  URI dirUri = myghfs.getGcsPath(dirPath);

  // A subdir path that looks like gs://<bucket>/<generated-tempdir>/foo-subdir where
  // neither the subdir nor gs://<bucket>/<generated-tempdir> exist yet.
  Path emptyObject = new Path(dirPath, "empty-object");
  URI objUri = myghfs.getGcsPath(emptyObject);
  StorageResourceId resource = StorageResourceId.fromUriPath(objUri, false);
  gcs.createEmptyObject(resource);

  boolean inferImplicitDirectories =
      gcsfs.getOptions().getCloudStorageOptions().isInferImplicitDirectoriesEnabled();
  boolean autoRepairImplicitDirectories =
      gcsfs.getOptions().getCloudStorageOptions().isAutoRepairImplicitDirectoriesEnabled();

  assertDirectory(gcsfs, dirUri, /* exists= */ inferImplicitDirectories);

  gcsfs.rename(objUri, objUri.resolve(".."));

  // Implicit directory created after deletion of the sole object in the directory
  assertDirectory(gcsfs, dirUri, /* exists= */ autoRepairImplicitDirectories);

  ghfsHelper.clearBucket(resource.getBucketName());
}
 
Example #5
Source File: GoogleHadoopFileSystemIntegrationTest.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
/** Validates success path in configureBuckets(). */
@Test
@Override
public void testConfigureBucketsSuccess() throws IOException {
  String rootBucketName = "gs://" + ghfsHelper.getUniqueBucketName("configure-root");

  URI initUri = new Path(rootBucketName).toUri();

  // To test configureBuckets which occurs after GCSFS initialization in configure(), while
  // still being reusable by derived unittests (we can't call loadConfig in a test case which
  // is inherited by a derived test), we will use the constructor which already provides a (fake)
  // GCSFS and skip the portions of the config specific to GCSFS.

  GoogleCloudStorageFileSystem fakeGcsFs =
      new GoogleCloudStorageFileSystem(new InMemoryGoogleCloudStorage());

  GoogleHadoopFileSystem fs = new GoogleHadoopFileSystem(fakeGcsFs);
  fs.initUri = initUri;
  fs.configureBuckets(fakeGcsFs);

  // Verify that config settings were set correctly.
  assertThat(fs.initUri).isEqualTo(initUri);

  initUri = new Path("gs://" + ghfsHelper.sharedBucketName1 + "/foo").toUri();
  fs = new GoogleHadoopFileSystem(fakeGcsFs);
  fs.initUri = initUri;
  fs.configureBuckets(fakeGcsFs);

  // Verify that config settings were set correctly.
  assertThat(fs.initUri).isEqualTo(initUri);

  assertThat(fs.getRootBucketName()).isEqualTo(initUri.getAuthority());
}
 
Example #6
Source File: GoogleHadoopFileSystemTestBase.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
@Test
public void testRepairDirectory_afterSubdirectoryRename() throws IOException {
  String bucketName = sharedBucketName1;
  GoogleHadoopFileSystemBase myghfs = (GoogleHadoopFileSystemBase) ghfs;
  GoogleCloudStorageFileSystem gcsfs = myghfs.getGcsFs();
  GoogleCloudStorage gcs = gcsfs.getGcs();

  // only if directory inferring is enabled, the directory without the implicit
  // directory entry can be deleted without the FileNotFoundException
  assumeTrue(gcsfs.getOptions().getCloudStorageOptions().isInferImplicitDirectoriesEnabled());

  URI seedUri = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath();
  Path dirPath = ghfsHelper.castAsHadoopPath(seedUri);
  URI dirUri = myghfs.getGcsPath(dirPath);
  Path subDir = new Path(dirPath, "subdir");
  URI subdirUri = myghfs.getGcsPath(subDir);

  // A subdir path that looks like gs://<bucket>/<generated-tempdir>/foo-subdir where
  // neither the subdir nor gs://<bucket>/<generated-tempdir> exist yet.
  Path emptyObject = new Path(subDir, "empty-object");
  URI objUri = myghfs.getGcsPath(emptyObject);
  StorageResourceId resource = StorageResourceId.fromUriPath(objUri, false);
  gcs.createEmptyObject(resource);

  boolean autoRepairImplicitDirectories =
      gcsfs.getOptions().getCloudStorageOptions().isAutoRepairImplicitDirectoriesEnabled();

  assertDirectory(gcsfs, dirUri, /* exists= */ true);
  assertDirectory(gcsfs, subdirUri, /* exists= */ true);

  gcsfs.rename(subdirUri, seedUri.resolve("."));

  // Implicit directory created after deletion of the sole object in the directory
  assertDirectory(gcsfs, dirUri, /* exists= */ autoRepairImplicitDirectories);

  ghfsHelper.clearBucket(bucketName);
}
 
Example #7
Source File: GoogleHadoopFileSystemTestBase.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
private static void assertDirectory(GoogleCloudStorageFileSystem gcsfs, URI path, boolean exists)
    throws IOException {
  assertWithMessage("Expected to %s: %s", exists ? "exist" : "not exist", path)
      .that(gcsfs.exists(path))
      .isEqualTo(exists);
  assertWithMessage("Expected to be a directory: %s", path)
      .that(gcsfs.getFileInfo(path).isDirectory())
      .isTrue();
}
 
Example #8
Source File: GoogleHadoopFileSystemTestHelper.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
/**
 * Creates an instance of a bucket-rooted GoogleHadoopFileSystemBase using an in-memory underlying
 * store.
 */
public static GoogleHadoopFileSystem createInMemoryGoogleHadoopFileSystem() throws IOException {
  GoogleCloudStorageOptions gcsOptions = GoogleCloudStorageOptions.DEFAULT;
  GoogleCloudStorageFileSystemOptions.Builder fsOptionsBuilder =
      GoogleCloudStorageFileSystemOptions.builder().setCloudStorageOptions(gcsOptions);
  GoogleCloudStorageFileSystem memoryGcsFs =
      new GoogleCloudStorageFileSystem(
          new InMemoryGoogleCloudStorage(gcsOptions), fsOptionsBuilder.build());
  GoogleHadoopFileSystem ghfs = new GoogleHadoopFileSystem(memoryGcsFs);
  initializeInMemoryFileSystem(ghfs, IN_MEMORY_TEST_BUCKET);
  return ghfs;
}
 
Example #9
Source File: GcsConfigurationProvider.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
public void updateConfiguration(Configuration configuration, HdfsContext context, URI uri)
{
    if (!uri.getScheme().equals(GoogleCloudStorageFileSystem.SCHEME)) {
        return;
    }

    String accessToken = context.getIdentity().getExtraCredentials().get(GCS_OAUTH_KEY);
    if (accessToken != null) {
        configuration.set(GCS_ACCESS_TOKEN_CONF, accessToken);
        setCacheKey(configuration, accessToken);
    }
}
 
Example #10
Source File: GoogleHadoopFileSystemIntegrationTest.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
/** Validates success path when there is a root bucket but no system bucket is specified. */
@Test
@Override
public void testConfigureBucketsWithRootBucketButNoSystemBucket() throws IOException {
  String rootBucketName = ghfsHelper.getUniqueBucketName("configure-root");
  URI initUri = new Path("gs://" + rootBucketName).toUri();
  GoogleCloudStorageFileSystem fakeGcsFs =
      new GoogleCloudStorageFileSystem(new InMemoryGoogleCloudStorage());
  GoogleHadoopFileSystem fs = new GoogleHadoopFileSystem(fakeGcsFs);
  fs.initUri = initUri;
  fs.configureBuckets(fakeGcsFs);

  // Verify that config settings were set correctly.
  assertThat(fs.initUri).isEqualTo(initUri);
}
 
Example #11
Source File: GoogleHadoopFileSystemIntegrationTest.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
/** Validates that exception thrown if no root bucket is specified. */
@Test
@Override
public void testConfigureBucketsWithNeitherRootBucketNorSystemBucket() throws IOException {
  URI initUri = new Path("gs://").toUri();
  final GoogleCloudStorageFileSystem fakeGcsFs =
      new GoogleCloudStorageFileSystem(new InMemoryGoogleCloudStorage());
  final GoogleHadoopFileSystem fs = new GoogleHadoopFileSystem(fakeGcsFs);
  fs.initUri = initUri;

  IllegalArgumentException thrown =
      assertThrows(IllegalArgumentException.class, () -> fs.configureBuckets(fakeGcsFs));

  assertThat(thrown).hasMessageThat().isEqualTo("No bucket specified in GCS URI: gs:/");
}
 
Example #12
Source File: InMemoryGoogleHadoopFileSystem.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
/**
 * Sets the GcsFs to be used. This should be called before an instance is constructed in order to
 * create a GCS storage over which to run the in-memory GHFS.
 */
private static synchronized GoogleCloudStorageFileSystem createUnderlyingStorage() {
  if (inMemoryGcsFs != null) {
    return inMemoryGcsFs;
  }
  return new GoogleCloudStorageFileSystem(new InMemoryGoogleCloudStorage());
}
 
Example #13
Source File: CoopLockRepairIntegrationTest.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
@AfterClass
public static void afterClass() throws Throwable {
  gcsfsIHelper.afterAllTests();
  GoogleCloudStorageFileSystem gcsfs = gcsfsIHelper.gcsfs;
  assertThat(gcsfs.exists(new URI("gs://" + gcsfsIHelper.sharedBucketName1))).isFalse();
  assertThat(gcsfs.exists(new URI("gs://" + gcsfsIHelper.sharedBucketName2))).isFalse();
}
 
Example #14
Source File: CoopLockRepairIntegrationTest.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
@Test
public void noOperations_checkSucceeds() throws Exception {
  String bucketName = gcsfsIHelper.createUniqueBucket("coop-no-op-check-succeeds");
  URI bucketUri = new URI("gs://" + bucketName + "/");
  String fileName = "file";
  URI dirUri = bucketUri.resolve("delete_" + UUID.randomUUID() + "/");

  // create file to delete
  gcsfsIHelper.writeTextFile(bucketName, dirUri.resolve(fileName).getPath(), "file_content");

  GoogleCloudStorageFileSystemOptions gcsFsOptions = newGcsFsOptions();

  GoogleCloudStorageFileSystem gcsFs = newGcsFs(gcsFsOptions, httpRequestInitializer);

  assertThat(gcsFs.exists(dirUri)).isTrue();
  assertThat(gcsFs.exists(dirUri.resolve(fileName))).isTrue();

  CoopLockFsck fsck = new CoopLockFsck();
  fsck.setConf(getTestConfiguration());

  fsck.run(new String[] {"--check", "gs://" + bucketName});

  assertThat(gcsFs.exists(dirUri)).isTrue();
  assertThat(gcsFs.exists(dirUri.resolve(fileName))).isTrue();

  assertThat(gcsFs.exists(bucketUri.resolve(LOCK_DIRECTORY))).isFalse();
}
 
Example #15
Source File: CoopLockRepairIntegrationTest.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
private static void failRenameOperation(
    URI srcDirUri,
    URI dstDirUri,
    GoogleCloudStorageFileSystemOptions options,
    Predicate<HttpRequest> failPredicate)
    throws IOException {
  HttpRequestInitializer failingRequestInitializer = newFailingRequestInitializer(failPredicate);
  GoogleCloudStorageFileSystem failingGcsFs = newGcsFs(options, failingRequestInitializer);

  Exception e = assertThrows(Exception.class, () -> failingGcsFs.rename(srcDirUri, dstDirUri));
  assertThat(e).hasCauseThat().hasCauseThat().hasMessageThat().endsWith("Injected failure");
}
 
Example #16
Source File: CoopLockRepairIntegrationTest.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
private static void failDeleteOperation(
    GoogleCloudStorageFileSystemOptions gcsFsOptions, String bucketName, URI dirUri)
    throws Exception {
  HttpRequestInitializer failingRequestInitializer =
      newFailingRequestInitializer(
          request ->
              "DELETE".equals(request.getRequestMethod())
                  && request.getUrl().toString().contains("/b/" + bucketName + "/o/"));
  GoogleCloudStorageFileSystem failingGcsFs = newGcsFs(gcsFsOptions, failingRequestInitializer);

  IOException e =
      assertThrows(IOException.class, () -> failingGcsFs.delete(dirUri, /* recursive= */ true));
  assertThat(e).hasCauseThat().hasCauseThat().hasMessageThat().endsWith("Injected failure");
}
 
Example #17
Source File: CoopLockRepairIntegrationTest.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
private static GoogleCloudStorageFileSystem newGcsFs(
    GoogleCloudStorageFileSystemOptions gcsFsOptions, HttpRequestInitializer requestInitializer)
    throws IOException {
  GoogleCloudStorageImpl gcs =
      new GoogleCloudStorageImpl(gcsFsOptions.getCloudStorageOptions(), requestInitializer);
  return new GoogleCloudStorageFileSystem(gcs, gcsFsOptions);
}
 
Example #18
Source File: GcsConf.java    From pentaho-hadoop-shims with Apache License 2.0 5 votes vote down vote up
@Override public Path mapPath( Path pvfsPath ) {
  validatePath( pvfsPath );
  String[] splitPath = pvfsPath.toUri().getPath().split( "/" );

  Preconditions.checkArgument( splitPath.length > 0 );
  String bucket = splitPath[1];
  String path = SEPARATOR + Arrays.stream( splitPath ).skip( 2 ).collect( Collectors.joining( SEPARATOR ) );
  try {
    return new Path( new URI( GoogleCloudStorageFileSystem.SCHEME, bucket, path, null ) );
  } catch ( URISyntaxException e ) {
    throw new IllegalStateException( e );
  }
}
 
Example #19
Source File: GoogleHadoopFileSystemTestBase.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
@Test
public void testRepairDirectory_afterFileDelete() throws IOException {
  GoogleHadoopFileSystemBase myghfs = (GoogleHadoopFileSystemBase) ghfs;
  GoogleCloudStorageFileSystem gcsfs = myghfs.getGcsFs();
  GoogleCloudStorage gcs = gcsfs.getGcs();
  URI seedUri = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath();
  Path dirPath = ghfsHelper.castAsHadoopPath(seedUri);
  URI dirUri = myghfs.getGcsPath(dirPath);

  // A subdir path that looks like gs://<bucket>/<generated-tempdir>/foo-subdir where
  // neither the subdir nor gs://<bucket>/<generated-tempdir> exist yet.
  Path emptyObject = new Path(dirPath, "empty-object");
  URI objUri = myghfs.getGcsPath(emptyObject);
  StorageResourceId resource = StorageResourceId.fromUriPath(objUri, false);
  gcs.createEmptyObject(resource);

  boolean inferImplicitDirectories =
      gcsfs.getOptions().getCloudStorageOptions().isInferImplicitDirectoriesEnabled();
  boolean autoRepairImplicitDirectories =
      gcsfs.getOptions().getCloudStorageOptions().isAutoRepairImplicitDirectoriesEnabled();

  assertDirectory(gcsfs, dirUri, /* exists= */ inferImplicitDirectories);

  gcsfs.delete(objUri, false);

  // Implicit directory created after deletion of the sole object in the directory
  assertDirectory(gcsfs, dirUri, /* exists= */ autoRepairImplicitDirectories);

  ghfsHelper.clearBucket(resource.getBucketName());
}
 
Example #20
Source File: GoogleHadoopFileSystemTestBase.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
/** Test implicit directories. */
@Test
public void testImplicitDirectory() throws IOException {
  String bucketName = sharedBucketName1;
  GoogleHadoopFileSystemBase myghfs = (GoogleHadoopFileSystemBase) ghfs;
  GoogleCloudStorageFileSystem gcsfs = myghfs.getGcsFs();
  URI seedUri = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath();
  Path parentPath = ghfsHelper.castAsHadoopPath(seedUri);
  URI parentUri = myghfs.getGcsPath(parentPath);

  // A subdir path that looks like gs://<bucket>/<generated-tempdir>/foo-subdir where
  // neither the subdir nor gs://<bucket>/<generated-tempdir> exist yet.
  Path subdirPath = new Path(parentPath, "foo-subdir");
  URI subdirUri = myghfs.getGcsPath(subdirPath);

  Path leafPath = new Path(subdirPath, "bar-subdir");
  URI leafUri = myghfs.getGcsPath(leafPath);
  gcsfs.mkdir(leafUri);

  boolean inferredDirExists =
      gcsfs.getOptions().getCloudStorageOptions().isInferImplicitDirectoriesEnabled();

  assertDirectory(gcsfs, leafUri, /* exists= */ true);
  assertDirectory(gcsfs, subdirUri, /* exists= */ inferredDirExists);
  assertDirectory(gcsfs, parentUri, /* exists= */ inferredDirExists);

  ghfsHelper.clearBucket(bucketName);
}
 
Example #21
Source File: GoogleHadoopFileSystem.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
/** Sets and validates the root bucket. */
@Override
@VisibleForTesting
protected void configureBuckets(GoogleCloudStorageFileSystem gcsFs) throws IOException {
  rootBucket = initUri.getAuthority();
  checkArgument(rootBucket != null, "No bucket specified in GCS URI: %s", initUri);
  // Validate root bucket name
  UriPaths.fromStringPathComponents(
      rootBucket, /* objectName= */ null, /* allowEmptyObjectName= */ true);
  logger.atFine().log("configureBuckets: GoogleHadoopFileSystem root bucket is '%s'", rootBucket);
}
 
Example #22
Source File: GoogleHadoopFileSystemBase.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
private GoogleCloudStorageFileSystem createGcsFs(Configuration config) throws IOException {
  GoogleCloudStorageFileSystemOptions gcsFsOptions =
      GoogleHadoopFileSystemConfiguration.getGcsFsOptionsBuilder(config).build();

  Credential credential;
  try {
    credential = getCredential(config, gcsFsOptions);
  } catch (GeneralSecurityException e) {
    throw new RuntimeException(e);
  }

  return new GoogleCloudStorageFileSystem(credential, gcsFsOptions);
}
 
Example #23
Source File: GoogleHadoopFileSystemBase.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
/**
 * Constructs an instance of {@link GoogleHadoopFileSystemBase} using the provided
 * GoogleCloudStorageFileSystem; initialize() will not re-initialize it.
 */
// TODO(b/120887495): This @VisibleForTesting annotation was being ignored by prod code.
// Please check that removing it is correct, and remove this comment along with it.
// @VisibleForTesting
GoogleHadoopFileSystemBase(GoogleCloudStorageFileSystem gcsFs) {
  checkNotNull(gcsFs, "gcsFs must not be null");
  setGcsFs(gcsFs);
}
 
Example #24
Source File: GoogleHadoopFileSystemBase.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
/**
 * Determines based on suitability of {@code fixedPath} whether to use flat globbing logic where
 * we use a single large listing during globStatus to then perform the core globbing logic
 * in-memory.
 */
@VisibleForTesting
boolean couldUseFlatGlob(Path fixedPath) {
  // Only works for filesystems where the base Hadoop Path scheme matches the underlying URI
  // scheme for GCS.
  if (!getUri().getScheme().equals(GoogleCloudStorageFileSystem.SCHEME)) {
    logger.atFinest().log(
        "Flat glob is on, but doesn't work for scheme '%s', using default behavior.",
        getUri().getScheme());
    return false;
  }

  // The full pattern should have a wildcard, otherwise there's no point doing the flat glob.
  GlobPattern fullPattern = new GlobPattern(fixedPath.toString());
  if (!fullPattern.hasWildcard()) {
    logger.atFinest().log(
        "Flat glob is on, but Path '%s' has no wildcard, using default behavior.", fixedPath);
    return false;
  }

  // To use a flat glob, there must be an authority defined.
  if (isNullOrEmpty(fixedPath.toUri().getAuthority())) {
    logger.atFinest().log(
        "Flat glob is on, but Path '%s' has a empty authority, using default behavior.",
        fixedPath);
    return false;
  }

  // And the authority must not contain a wildcard.
  GlobPattern authorityPattern = new GlobPattern(fixedPath.toUri().getAuthority());
  if (authorityPattern.hasWildcard()) {
    logger.atFinest().log(
        "Flat glob is on, but Path '%s' has a wildcard authority, using default behavior.",
        fixedPath);
    return false;
  }

  return true;
}
 
Example #25
Source File: GoogleHadoopFileSystemTestBase.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
@Test
public void testRepairDirectory_afterSubdirectoryDelete() throws IOException {
  GoogleHadoopFileSystemBase myghfs = (GoogleHadoopFileSystemBase) ghfs;
  GoogleCloudStorageFileSystem gcsfs = myghfs.getGcsFs();
  GoogleCloudStorage gcs = gcsfs.getGcs();

  // only if directory inferring is enabled, the directory without the implicit
  // directory entry can be deleted without the FileNotFoundException
  assumeTrue(gcsfs.getOptions().getCloudStorageOptions().isInferImplicitDirectoriesEnabled());

  URI seedUri = GoogleCloudStorageFileSystemIntegrationTest.getTempFilePath();
  Path dirPath = ghfsHelper.castAsHadoopPath(seedUri);
  URI dirUri = myghfs.getGcsPath(dirPath);
  Path subDir = new Path(dirPath, "subdir");
  URI subdirUri = myghfs.getGcsPath(subDir);

  // A subdir path that looks like gs://<bucket>/<generated-tempdir>/foo-subdir where
  // neither the subdir nor gs://<bucket>/<generated-tempdir> exist yet.
  Path emptyObject = new Path(subDir, "empty-object");
  URI objUri = myghfs.getGcsPath(emptyObject);
  StorageResourceId resource = StorageResourceId.fromUriPath(objUri, false);
  gcs.createEmptyObject(resource);

  boolean autoRepairImplicitDirectories =
      gcsfs.getOptions().getCloudStorageOptions().isAutoRepairImplicitDirectoriesEnabled();

  assertDirectory(gcsfs, dirUri, /* exists= */ true);
  assertDirectory(gcsfs, subdirUri, /* exists= */ true);

  gcsfs.delete(subdirUri, true);

  // Implicit directory created after deletion of the sole object in the directory
  assertDirectory(gcsfs, dirUri, /* exists= */ autoRepairImplicitDirectories);

  ghfsHelper.clearBucket(resource.getBucketName());
}
 
Example #26
Source File: CoopLockRepairIntegrationTest.java    From hadoop-connectors with Apache License 2.0 4 votes vote down vote up
@Test
public void failedDirectoryDelete_checkSucceeds() throws Exception {
  String bucketName = gcsfsIHelper.createUniqueBucket("coop-delete-check-failed");
  URI bucketUri = new URI("gs://" + bucketName + "/");
  String fileName = "file";
  URI dirUri = bucketUri.resolve("delete_" + UUID.randomUUID() + "/");

  // create file to delete
  gcsfsIHelper.writeTextFile(bucketName, dirUri.resolve(fileName).getPath(), "file_content");

  GoogleCloudStorageFileSystemOptions gcsFsOptions = newGcsFsOptions();

  failDeleteOperation(gcsFsOptions, bucketName, dirUri);

  GoogleCloudStorageFileSystem gcsFs = newGcsFs(gcsFsOptions, httpRequestInitializer);

  assertThat(gcsFs.exists(dirUri)).isTrue();
  assertThat(gcsFs.exists(dirUri.resolve(fileName))).isTrue();

  CoopLockFsck fsck = new CoopLockFsck();
  fsck.setConf(getTestConfiguration());

  fsck.run(new String[] {"--check", "gs://" + bucketName});

  assertThat(gcsFs.exists(dirUri)).isTrue();
  assertThat(gcsFs.exists(dirUri.resolve(fileName))).isTrue();

  // Validate lock files
  List<URI> lockFiles =
      gcsFs.listFileInfo(bucketUri.resolve(LOCK_DIRECTORY)).stream()
          .map(FileInfo::getPath)
          .collect(toList());

  assertThat(lockFiles).hasSize(3);
  assertThat(matchFile(lockFiles, "all\\.lock")).isNotNull();
  String filenamePattern = String.format(OPERATION_FILENAME_PATTERN_FORMAT, DELETE);
  URI lockFileUri = matchFile(lockFiles, filenamePattern + "\\.lock").get();
  URI logFileUri = matchFile(lockFiles, filenamePattern + "\\.log").get();
  String lockContent = gcsfsIHelper.readTextFile(bucketName, lockFileUri.getPath());
  assertThat(GSON.fromJson(lockContent, DeleteOperation.class).setLockExpiration(null))
      .isEqualTo(new DeleteOperation().setLockExpiration(null).setResource(dirUri.toString()));
  assertThat(gcsfsIHelper.readTextFile(bucketName, logFileUri.getPath()))
      .isEqualTo(dirUri.resolve(fileName) + "\n" + dirUri + "\n");
}
 
Example #27
Source File: GcsConf.java    From pentaho-hadoop-shims with Apache License 2.0 4 votes vote down vote up
@Override public boolean supportsConnection() {
  return GoogleCloudStorageFileSystem.SCHEME.equalsIgnoreCase( details.getType() );
}
 
Example #28
Source File: BucketUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private static boolean isEligibleForPrefetching(final String scheme){
    return scheme != null
            && (scheme.equals(GoogleCloudStorageFileSystem.SCHEME)
            || scheme.equals(HttpFileSystemProvider.SCHEME)
            || scheme.equals(HttpsFileSystemProvider.SCHEME));
}
 
Example #29
Source File: GoogleHadoopSyncableOutputStream.java    From hadoop-connectors with Apache License 2.0 4 votes vote down vote up
private void commitCurrentFile() throws IOException {
  // TODO(user): Optimize the case where 0 bytes have been written in the current component
  // to return early.
  WritableByteChannel innerChannel = curDelegate.getInternalChannel();
  curDelegate.close();

  long generationId = StorageResourceId.UNKNOWN_GENERATION_ID;
  if (innerChannel instanceof GoogleCloudStorageItemInfo.Provider) {
    generationId = ((GoogleCloudStorageItemInfo.Provider) innerChannel)
        .getItemInfo().getContentGeneration();
    logger.atFine().log(
        "innerChannel is GoogleCloudStorageItemInfo.Provider; closed generationId %s.",
        generationId);
  } else {
    logger.atFine().log("innerChannel NOT instanceof provider: %s", innerChannel.getClass());
  }

  // On the first component, curGcsPath will equal finalGcsPath, and no compose() call is
  // necessary. Otherwise, we compose in-place into the destination object and then delete
  // the temporary object.
  if (!finalGcsPath.equals(curGcsPath)) {
    StorageResourceId destResourceId =
        StorageResourceId.fromStringPath(finalGcsPath.toString(), curDestGenerationId);
    final StorageResourceId tempResourceId =
        StorageResourceId.fromStringPath(curGcsPath.toString(), generationId);
    if (!destResourceId.getBucketName().equals(tempResourceId.getBucketName())) {
      throw new IllegalStateException(String.format(
          "Destination bucket in path '%s' doesn't match temp file bucket in path '%s'",
          finalGcsPath, curGcsPath));
    }
    GoogleCloudStorageItemInfo composedObject = ghfs.getGcsFs().getGcs().composeObjects(
        ImmutableList.of(destResourceId, tempResourceId),
        destResourceId,
        GoogleCloudStorageFileSystem.objectOptionsFromFileOptions(fileOptions));
    curDestGenerationId = composedObject.getContentGeneration();
    deletionFutures.add(
        cleanupThreadpool.submit(
            () -> {
              ghfs.getGcsFs().getGcs().deleteObjects(ImmutableList.of(tempResourceId));
              return null;
            }));
  } else {
    // First commit was direct to the destination; the generationId of the object we just
    // committed will be used as the destination generation id for future compose calls.
    curDestGenerationId = generationId;
  }
}
 
Example #30
Source File: CoopLockRepairIntegrationTest.java    From hadoop-connectors with Apache License 2.0 4 votes vote down vote up
private static void failedDirectoryDelete_successfullyRepaired(String command) throws Exception {
  String bucketName =
      gcsfsIHelper.createUniqueBucket(
          "coop-delete-" + Ascii.toLowerCase(command).replace("--roll", "") + "-failed");
  URI bucketUri = new URI("gs://" + bucketName + "/");
  String fileName = "file";
  URI dirUri = bucketUri.resolve("delete_" + UUID.randomUUID() + "/");

  // create file to delete
  gcsfsIHelper.writeTextFile(bucketName, dirUri.resolve(fileName).getPath(), "file_content");

  GoogleCloudStorageFileSystemOptions gcsFsOptions = newGcsFsOptions();

  failDeleteOperation(gcsFsOptions, bucketName, dirUri);

  GoogleCloudStorageFileSystem gcsFs = newGcsFs(gcsFsOptions, httpRequestInitializer);

  assertThat(gcsFs.exists(dirUri)).isTrue();
  assertThat(gcsFs.exists(dirUri.resolve(fileName))).isTrue();

  CoopLockFsck fsck = new CoopLockFsck();
  fsck.setConf(getTestConfiguration());

  // Wait until lock will expire
  sleepUninterruptibly(COOP_LOCK_TIMEOUT);

  fsck.run(new String[] {command, "gs://" + bucketName, "all"});

  assertThat(gcsFs.exists(dirUri)).isEqualTo(!"--rollForward".equals(command));
  assertThat(gcsFs.exists(dirUri.resolve(fileName))).isEqualTo(!"--rollForward".equals(command));

  // Validate lock files
  List<URI> lockFiles =
      gcsFs.listFileInfo(bucketUri.resolve(LOCK_DIRECTORY)).stream()
          .map(FileInfo::getPath)
          .collect(toList());

  assertThat(lockFiles).hasSize("--rollForward".equals(command) ? 2 : 3);
  String filenamePattern = String.format(OPERATION_FILENAME_PATTERN_FORMAT, DELETE);
  URI lockFileUri = matchFile(lockFiles, filenamePattern + "\\.lock").get();
  URI logFileUri = matchFile(lockFiles, filenamePattern + "\\.log").get();
  String lockContent = gcsfsIHelper.readTextFile(bucketName, lockFileUri.getPath());
  assertThat(GSON.fromJson(lockContent, DeleteOperation.class).setLockExpiration(null))
      .isEqualTo(new DeleteOperation().setLockExpiration(null).setResource(dirUri.toString()));
  assertThat(gcsfsIHelper.readTextFile(bucketName, logFileUri.getPath()))
      .isEqualTo(dirUri.resolve(fileName) + "\n" + dirUri + "\n");
}