Python google.cloud.storage.Bucket() Examples

The following are 29 code examples of google.cloud.storage.Bucket(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module google.cloud.storage , or try the search function .
Example #1
Source File: bucket_mover_service.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def _print_config_details(cloud_logger, config):
    """Print out the pertinent project/bucket details

    Args:
        cloud_logger: A GCP logging client instance
        config: A Configuration object with all of the config values needed for the script to run
    """
    _print_and_log(cloud_logger,
                   'Source Project: {}'.format(config.source_project))
    _print_and_log(cloud_logger, 'Source Bucket: {}'.format(config.bucket_name))
    _print_and_log(cloud_logger, 'Source Service Account: {}'.format(
        config.source_project_credentials.service_account_email))  # pylint: disable=no-member
    _print_and_log(cloud_logger,
                   'Target Project: {}'.format(config.target_project))
    _print_and_log(cloud_logger,
                   'Target Bucket: {}'.format(config.target_bucket_name))
    _print_and_log(cloud_logger, 'Target Service Account: {}'.format(
        config.target_project_credentials.service_account_email))  # pylint: disable=no-member 
Example #2
Source File: embedding.py    From tensorboardX with MIT License 6 votes vote down vote up
def maybe_upload_file(local_path):
    '''Upload a file to remote cloud storage
    if the path starts with gs:// or s3://
    '''
    if local_path.startswith(('s3://', 'gs://')):
        prefix = local_path.split(':')[0]
        remote_bucket_path = local_path[len("s3://"):]  # same length
        bp = remote_bucket_path.split("/")
        bucket = bp[0]
        path = remote_bucket_path[1 + len(bucket):]

        # s3://example/file becomes s3:/example/file in Linux
        local_path = prefix + ':/' + remote_bucket_path
        if prefix == 's3':
            import boto3
            s3 = boto3.client('s3', endpoint_url=os.environ.get('S3_ENDPOINT'))
            s3.upload_file(local_path, bucket, path)

        elif prefix == 'gs':
            from google.cloud import storage
            client = storage.Client()

            Hbucket = storage.Bucket(client, bucket)
            blob = storage.Blob(path, Hbucket)
            blob.upload_from_filename(local_path) 
Example #3
Source File: system.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def tearDown(self):
        def _still_in_use(bad_request):
            return any(
                error["reason"] == "resourceInUse" for error in bad_request._errors
            )

        retry_in_use = RetryErrors(BadRequest, error_predicate=_still_in_use)
        retry_storage_errors_conflict = RetryErrors(
            (Conflict, TooManyRequests, InternalServerError, ServiceUnavailable)
        )
        for doomed in self.to_delete:
            if isinstance(doomed, storage.Bucket):
                retry_storage_errors_conflict(doomed.delete)(force=True)
            elif isinstance(doomed, (Dataset, bigquery.DatasetReference)):
                retry_in_use(Config.CLIENT.delete_dataset)(doomed, delete_contents=True)
            elif isinstance(doomed, (Table, bigquery.TableReference)):
                retry_in_use(Config.CLIENT.delete_table)(doomed)
            else:
                doomed.delete() 
Example #4
Source File: embedding.py    From tensorboardX with MIT License 6 votes vote down vote up
def maybe_upload_file(local_path):
    '''Upload a file to remote cloud storage
    if the path starts with gs:// or s3://
    '''
    if local_path.startswith(('s3://', 'gs://')):
        prefix = local_path.split(':')[0]
        remote_bucket_path = local_path[len("s3://"):]  # same length
        bp = remote_bucket_path.split("/")
        bucket = bp[0]
        path = remote_bucket_path[1 + len(bucket):]

        # s3://example/file becomes s3:/example/file in Linux
        local_path = prefix + ':/' + remote_bucket_path
        if prefix == 's3':
            import boto3
            s3 = boto3.client('s3', endpoint_url=os.environ.get('S3_ENDPOINT'))
            s3.upload_file(local_path, bucket, path)

        elif prefix == 'gs':
            from google.cloud import storage
            client = storage.Client()

            Hbucket = storage.Bucket(client, bucket)
            blob = storage.Blob(path, Hbucket)
            blob.upload_from_filename(local_path) 
Example #5
Source File: gcs.py    From whisper-backup with Apache License 2.0 6 votes vote down vote up
def __init__(self, bucket, project="", region="us", noop=False):
        """Setup the GCS storage backend with the bucket we will use and
           optional region."""
        if project == "":
            self.client = storage.Client()
        else:
            self.client = storage.Client(project)

        self.noop = noop

        self.bucket = storage.Bucket(self.client, bucket)
        self.bucket.location = region
        self.bucket.storage_class = "STANDARD"

        # Create the bucket if it doesn't exist
        if not self.bucket.exists():
            if not noop:
                self.bucket.create()
            else:
                logger.info("No-Op: Create bucket: %s" % bucket) 
Example #6
Source File: gcs.py    From airflow with Apache License 2.0 6 votes vote down vote up
def get_blob_update_time(self, bucket_name, object_name):
        """
        Get the update time of a file in Google Cloud Storage

        :param bucket_name: The Google Cloud Storage bucket where the object is.
        :type bucket_name: str
        :param object_name: The name of the blob to get updated time from the Google cloud
            storage bucket.
        :type object_name: str
        """
        client = self.get_conn()
        bucket = client.bucket(bucket_name)
        blob = bucket.get_blob(blob_name=object_name)
        if blob is None:
            raise ValueError("Object ({}) not found in Bucket ({})".format(
                object_name, bucket_name))
        return blob.updated 
Example #7
Source File: gcs.py    From airflow with Apache License 2.0 6 votes vote down vote up
def delete_bucket(self, bucket_name: str, force: bool = False):
        """
        Delete a bucket object from the Google Cloud Storage.

        :param bucket_name: name of the bucket which will be deleted
        :type bucket_name: str
        :param force: false not allow to delete non empty bucket, set force=True
            allows to delete non empty bucket
        :type: bool
        """

        client = self.get_conn()
        bucket = client.bucket(bucket_name)

        self.log.info("Deleting %s bucket", bucket_name)
        try:
            bucket.delete(force=force)
            self.log.info("Bucket %s has been deleted", bucket_name)
        except NotFound:
            self.log.info("Bucket %s not exists", bucket_name) 
Example #8
Source File: gcs_transcript_utils.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def get_files(client: storage.Client,
              bucket: storage.Bucket) -> List[dict]:
    """Retrieves all files in a given GCS bucket

    Args:
        client: Object representing Python GCS client
        bucket: google.cloud.storage.Bucket holding bucket name

    Returns:
       List of dicts [{name: String holding file name,
                        type: String representing type of file, 'audio/flac'.
                       }]
    """
    bucket = client.get_bucket(bucket)
    return [{'name': blob.name,
             'type': blob.content_type} for blob in list(bucket.list_blobs())] 
Example #9
Source File: conftest.py    From django-gcloud-storage with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def storage(request):
    # create a random test bucket name
    bucket_name = "test_bucket_" + get_random_string(6, string.ascii_lowercase)

    storage = DjangoGCloudStorage(
        project=request.config.getoption("--gcs-project-name"),
        bucket=bucket_name,
        credentials_file_path=request.config.getoption("--gcs-credentials-file")
    )

    # Make sure the bucket exists
    bucket = Bucket(storage.client, bucket_name)
    bucket.create(
        location=request.config.getoption("--gcs-bucket-location")
    )

    yield storage

    storage.bucket.delete_blobs(storage.bucket.list_blobs())

    storage.bucket.delete(force=True) 
Example #10
Source File: snippets.py    From python-storage with Apache License 2.0 5 votes vote down vote up
def create_bucket(client, to_delete):
    from google.cloud.storage import Bucket

    # [START create_bucket]
    bucket = client.create_bucket("my-bucket")
    assert isinstance(bucket, Bucket)
    # <Bucket: my-bucket>
    # [END create_bucket]

    to_delete.append(bucket) 
Example #11
Source File: gs_client.py    From spotty with MIT License 5 votes vote down vote up
def create_bucket(self, bucket_name: str, region: str) -> Bucket:
        bucket = Bucket(self._client, name=bucket_name)
        bucket.create(location=region)

        return bucket 
Example #12
Source File: snippets.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def to_delete(client):
    doomed = []
    yield doomed
    for item in doomed:
        if isinstance(item, (bigquery.Dataset, bigquery.DatasetReference)):
            retry_429(client.delete_dataset)(item, delete_contents=True)
        elif isinstance(item, storage.Bucket):
            retry_storage_errors(item.delete)()
        else:
            retry_429(item.delete)() 
Example #13
Source File: gcs_transcript_utils.py    From professional-services with Apache License 2.0 5 votes vote down vote up
def find_bucket_with_prefix(bucket_iter: Iterator[Bucket],
                            prefix: str) -> Union[str, None]:
    """"Finds bucket in a project based on bucket prefix.

    Args:
        bucket_iter: Iterator of google.cloud.storage.Bucket instances
        prefix: Bucket name prefix to search for

    Returns:
        Bucket name with the specified prefix.
    """
    for bucket in bucket_iter:
        if bucket.name.startswith(prefix):
            return bucket.name
    raise NotFound(f'No bucket found with prefix: {prefix}') 
Example #14
Source File: test_app_gcs_transcript_utils.py    From professional-services with Apache License 2.0 5 votes vote down vote up
def test_find_bucket_found_with_prefix(self):
        """Tests that bucket is found when given prefix."""
        bucket_foo = storage.Bucket(self.gcs_client, self.foo_name)
        bucket_bar = storage.Bucket(self.gcs_client, self.bar_name)
        bucket_iterator = iter([bucket_foo, bucket_bar])
        actual_output = gcs_transcript_utils.find_bucket_with_prefix(
            bucket_iterator,
            self.foo_name)
        expected_output = self.foo_name
        self.assertEqual(actual_output, expected_output) 
Example #15
Source File: test_app_gcs_transcript_utils.py    From professional-services with Apache License 2.0 5 votes vote down vote up
def test_find_bucket_not_found_with_prefix(self):
        """Tests that exception is raised if bucket is not found.S"""
        bucket_foo = storage.Bucket(self.gcs_client, self.foo_name)
        bucket_iterator = iter([bucket_foo])
        self.assertRaises(NotFound,
                          gcs_transcript_utils.find_bucket_with_prefix,
                          bucket_iterator,
                          self.bar_name) 
Example #16
Source File: bucket_mover_service.py    From professional-services with Apache License 2.0 5 votes vote down vote up
def main(config, parsed_args, cloud_logger):
    """Main entry point for the bucket mover tool

    Args:
        config: A Configuration object with all of the config values needed for the script to run
        parsed_args: the configargparser parsing of command line options
        cloud_logger: A GCP logging client instance
    """

    cloud_logger.log_text("Starting GCS Bucket Mover")
    _print_config_details(cloud_logger, config)

    source_bucket = config.source_storage_client.lookup_bucket(  # pylint: disable=no-member
        config.bucket_name)

    # Get copies of all of the source bucket's IAM, ACLs and settings so they
    # can be copied over to the target project bucket
    source_bucket_details = bucket_details.BucketDetails(
        conf=parsed_args, source_bucket=source_bucket)

    _check_bucket_lock(cloud_logger, config, source_bucket,
                       source_bucket_details)

    sts_client = discovery.build(
        'storagetransfer', 'v1', credentials=config.target_project_credentials)

    if config.is_rename:
        _rename_bucket(cloud_logger, config, source_bucket,
                       source_bucket_details, sts_client)
    else:
        _move_bucket(cloud_logger, config, source_bucket, source_bucket_details,
                     sts_client)

    cloud_logger.log_text('Completed GCS Bucket Mover') 
Example #17
Source File: bucket_mover_service.py    From professional-services with Apache License 2.0 5 votes vote down vote up
def _create_target_bucket(cloud_logger, config, source_bucket_details,
                          bucket_name):
    """Creates either the temp bucket or target bucket (during rename) in the target project

    Args:
        cloud_logger: A GCP logging client instance
        config: A Configuration object with all of the config values needed for the script to run
        source_bucket_details: The details copied from the source bucket that is being moved
        bucket_name: The name of the bucket to create

    Returns:
        The bucket object that has been created in GCS
    """

    if config.is_rename:
        spinner_text = 'Creating target bucket'
    else:
        spinner_text = 'Creating temp target bucket'

    cloud_logger.log_text(spinner_text)
    with yaspin(text=spinner_text) as spinner:
        target_bucket = _create_bucket(spinner, cloud_logger, config,
                                       bucket_name, source_bucket_details)
        _write_spinner_and_log(
            spinner, cloud_logger,
            'Bucket {} created in target project {}'.format(
                bucket_name, config.target_project))
        return target_bucket 
Example #18
Source File: snippets.py    From python-storage with Apache License 2.0 5 votes vote down vote up
def lookup_bucket(client, to_delete):
    from google.cloud.storage.bucket import Bucket

    # [START lookup_bucket]
    bucket = client.lookup_bucket("doesnt-exist")
    assert not bucket
    # None
    bucket = client.lookup_bucket("my-bucket")
    assert isinstance(bucket, Bucket)
    # <Bucket: my-bucket>
    # [END lookup_bucket]

    to_delete.append(bucket) 
Example #19
Source File: gs_client.py    From spotty with MIT License 5 votes vote down vote up
def create_dir(self, bucket_name: str, path: str):
        bucket = Bucket(self._client, name=bucket_name)
        blob = bucket.blob(path.rstrip('/') + '/')
        blob.upload_from_string('') 
Example #20
Source File: actions_lib.py    From incubator-dlab with Apache License 2.0 5 votes vote down vote up
def create_bucket(self, bucket_name):
        try:
            bucket = self.storage_client.create_bucket(bucket_name)
            print('Bucket {} created.'.format(bucket.name))
        except Exception as err:
            logging.info(
                "Unable to create Bucket: " + str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout))
            append_result(str({"error": "Unable to create Bucket",
                               "error_message": str(err) + "\n Traceback: " + traceback.print_exc(
                                   file=sys.stdout)}))
            traceback.print_exc(file=sys.stdout) 
Example #21
Source File: gs_client.py    From spotty with MIT License 5 votes vote down vote up
def list_buckets(self) -> List[Bucket]:
        res = list(self._client.list_buckets())
        return res 
Example #22
Source File: http.py    From open-raadsinformatie with MIT License 5 votes vote down vote up
def get_bucket(self):
        """Get the bucket defined by 'bucket_name' from the storage_client.
        Throws a ValueError when bucket_name is not set. If the bucket does not
        exist in GCS, a new bucket will be created.
        """
        if self._bucket:
            return self._bucket

        if not self.bucket_name:
            raise ValueError("The 'bucket_name' needs to be set.")

        try:
            self._bucket = self.storage_client.get_bucket(self.bucket_name)
        except (exceptions.NotFound, exceptions.Forbidden):
            bucket = storage.Bucket(self.storage_client, name=self.bucket_name)
            bucket.versioning_enabled = True
            bucket.lifecycle_rules = [{
                'action': {'type': 'SetStorageClass', 'storageClass': 'NEARLINE'},
                'condition': {
                    'numNewerVersions': 1,
                    'matchesStorageClass': ['REGIONAL', 'STANDARD'],
                    'age': 30
                }
            }]
            try:
                bucket.create(location='europe-west4')
            except exceptions.Conflict:
                raise
            self._bucket = self.storage_client.get_bucket(self.bucket_name)

        return self._bucket 
Example #23
Source File: gcs.py    From airflow with Apache License 2.0 5 votes vote down vote up
def _prepare_sync_plan(
        source_bucket: storage.Bucket,
        destination_bucket: storage.Bucket,
        source_object: Optional[str],
        destination_object: Optional[str],
        recursive: bool,
    ) -> Tuple[Set[storage.Blob], Set[storage.Blob], Set[storage.Blob]]:
        # Calculate the number of characters that remove from the name, because they contain information
        # about the parent's path
        source_object_prefix_len = len(source_object) if source_object else 0
        destination_object_prefix_len = len(destination_object) if destination_object else 0
        delimiter = "/" if not recursive else None
        # Fetch blobs list
        source_blobs = list(source_bucket.list_blobs(prefix=source_object, delimiter=delimiter))
        destination_blobs = list(
            destination_bucket.list_blobs(prefix=destination_object, delimiter=delimiter))
        # Create indexes that allow you to identify blobs based on their name
        source_names_index = {a.name[source_object_prefix_len:]: a for a in source_blobs}
        destination_names_index = {a.name[destination_object_prefix_len:]: a for a in destination_blobs}
        # Create sets with names without parent object name
        source_names = set(source_names_index.keys())
        destination_names = set(destination_names_index.keys())
        # Determine objects to copy and delete
        to_copy = source_names - destination_names
        to_delete = destination_names - source_names
        to_copy_blobs = {source_names_index[a] for a in to_copy}  # type: Set[storage.Blob]
        to_delete_blobs = {destination_names_index[a] for a in to_delete}  # type: Set[storage.Blob]
        # Find names that are in both buckets
        names_to_check = source_names.intersection(destination_names)
        to_rewrite_blobs = set()  # type: Set[storage.Blob]
        # Compare objects based on crc32
        for current_name in names_to_check:
            source_blob = source_names_index[current_name]
            destination_blob = destination_names_index[current_name]
            # if the objects are different, save it
            if source_blob.crc32c != destination_blob.crc32c:
                to_rewrite_blobs.add(source_blob)
        return to_copy_blobs, to_delete_blobs, to_rewrite_blobs 
Example #24
Source File: test_gcs.py    From airflow with Apache License 2.0 5 votes vote down vote up
def test_create_bucket_with_resource(self, mock_service, mock_bucket):
        test_bucket = 'test_bucket'
        test_project = 'test-project'
        test_location = 'EU'
        test_labels = {'env': 'prod'}
        test_storage_class = 'MULTI_REGIONAL'
        test_versioning_enabled = {"enabled": True}

        mock_service.return_value.bucket.return_value.create.return_value = None
        mock_bucket.return_value.storage_class = test_storage_class
        mock_bucket.return_value.labels = test_labels
        mock_bucket.return_value.versioning_enabled = True

        sample_bucket = mock_service().bucket(bucket_name=test_bucket)

        # sample_bucket = storage.Bucket(client=mock_service, name=test_bucket)
        # Assert for resource other than None.
        response = self.gcs_hook.create_bucket(
            bucket_name=test_bucket,
            resource={"versioning": test_versioning_enabled},
            storage_class=test_storage_class,
            location=test_location,
            labels=test_labels,
            project_id=test_project
        )
        self.assertEqual(response, sample_bucket.id)

        mock_service.return_value.bucket.return_value._patch_property.assert_called_once_with(
            name='versioning', value=test_versioning_enabled
        )

        mock_service.return_value.bucket.return_value.create.assert_called_once_with(
            project=test_project, location=test_location
        ) 
Example #25
Source File: record_writer.py    From tensorboardX with MIT License 5 votes vote down vote up
def __init__(self, path):
        if not GCS_ENABLED:
            raise ImportError("`google-cloud-storage` must be installed in order to use "
                              "the 'gs://' protocol")

        self.path = path
        self.buffer = io.BytesIO()

        client = storage.Client()
        bucket_name, filepath = self.bucket_and_path()
        bucket = storage.Bucket(client, bucket_name)
        self.blob = storage.Blob(filepath, bucket) 
Example #26
Source File: record_writer.py    From tensorboardX with MIT License 5 votes vote down vote up
def __init__(self, path):
        if not GCS_ENABLED:
            raise ImportError("`google-cloud-storage` must be installed in order to use "
                              "the 'gs://' protocol")

        self.path = path
        self.buffer = io.BytesIO()

        client = storage.Client()
        bucket_name, filepath = self.bucket_and_path()
        bucket = storage.Bucket(client, bucket_name)
        self.blob = storage.Blob(filepath, bucket) 
Example #27
Source File: actions_lib.py    From incubator-dlab with Apache License 2.0 5 votes vote down vote up
def remove_bucket(self, bucket_name):
        try:
            GCPActions().bucket_cleanup(bucket_name)
            storage_resource = storage.Bucket(self.storage_client, bucket_name)
            storage_resource.delete(force=True)
            print('Bucket {} removed.'.format(bucket_name))
        except Exception as err:
            logging.info(
                "Unable to remove Bucket: " + str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout))
            append_result(str({"error": "Unable to remove Bucket",
                               "error_message": str(err) + "\n Traceback: " + traceback.print_exc(
                                   file=sys.stdout)}))
            traceback.print_exc(file=sys.stdout) 
Example #28
Source File: actions_lib.py    From incubator-dlab with Apache License 2.0 5 votes vote down vote up
def add_bucket_labels(self, bucket_name, tags):
        try:
            bucket = self.storage_client.get_bucket(bucket_name)
            labels = bucket.labels
            labels.update(tags)
            bucket.labels = labels
            bucket.patch()
            print('Updated labels on {}.'.format(bucket_name))
        except Exception as err:
            logging.info(
                "Unable to create Bucket: " + str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout))
            append_result(str({"error": "Unable to create Bucket",
                               "error_message": str(err) + "\n Traceback: " + traceback.print_exc(
                                   file=sys.stdout)}))
            traceback.print_exc(file=sys.stdout) 
Example #29
Source File: bucket_mover_tester.py    From professional-services with Apache License 2.0 4 votes vote down vote up
def set_up_test_bucket(config, parsed_args):
    """Sets up the test bucket, adds objects and assigns various settings.

    It makes sure none of the buckets already exist, and then runs the main bucket mover service.

    Args:
        config: A Configuration object with all of the config values needed for the script to run
        parsed_args: the configargparser parsing of command line options

    Returns:
        The name of the randomly generated bucket
    """

    random_bucket_name = _get_random_bucket_name()
    config.temp_bucket_name = random_bucket_name + '-temp'

    with yaspin(text='TESTING: Cleanup source bucket') as spinner:
        try:
            _check_bucket_exists_and_delete(
                spinner, config.source_storage_client, random_bucket_name,
                config.source_project)
        except exceptions.Forbidden:
            try:
                # Maybe the bucket already exists in the target project.
                _check_bucket_exists_and_delete(
                    spinner, config.target_storage_client, random_bucket_name,
                    config.target_project)
            except exceptions.Forbidden:
                spinner.write('TESTING: Not allowed to access bucket {}'.format(
                    random_bucket_name))
                spinner.fail('X')
                raise SystemExit()

        source_bucket = create_bucket(config.source_storage_client,
                                      random_bucket_name, parsed_args)
        spinner.write(
            '{} TESTING: Bucket {} created in source project {}'.format(
                _CHECKMARK, random_bucket_name, config.source_project))

    _upload_blobs(source_bucket)

    with yaspin(text='TESTING: Cleanup target bucket') as spinner:
        _check_bucket_exists_and_delete(spinner, config.target_storage_client,
                                        config.temp_bucket_name,
                                        config.target_project)
    print()
    return random_bucket_name