Python google.cloud.storage.Blob() Examples

The following are 30 code examples of google.cloud.storage.Blob(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module google.cloud.storage , or try the search function .
Example #1
Source File: embedding.py    From tensorboardX with MIT License 6 votes vote down vote up
def maybe_upload_file(local_path):
    '''Upload a file to remote cloud storage
    if the path starts with gs:// or s3://
    '''
    if local_path.startswith(('s3://', 'gs://')):
        prefix = local_path.split(':')[0]
        remote_bucket_path = local_path[len("s3://"):]  # same length
        bp = remote_bucket_path.split("/")
        bucket = bp[0]
        path = remote_bucket_path[1 + len(bucket):]

        # s3://example/file becomes s3:/example/file in Linux
        local_path = prefix + ':/' + remote_bucket_path
        if prefix == 's3':
            import boto3
            s3 = boto3.client('s3', endpoint_url=os.environ.get('S3_ENDPOINT'))
            s3.upload_file(local_path, bucket, path)

        elif prefix == 'gs':
            from google.cloud import storage
            client = storage.Client()

            Hbucket = storage.Bucket(client, bucket)
            blob = storage.Blob(path, Hbucket)
            blob.upload_from_filename(local_path) 
Example #2
Source File: __init__.py    From jgscm with MIT License 6 votes vote down vote up
def _save_directory(self, path, model):
        """Creates a directory in GCS."""
        exists, obj = self._fetch(path)
        if exists:
            if isinstance(obj, Blob):
                raise web.HTTPError(400, u"Not a directory: %s" % path)
            else:
                self.log.debug("Directory %r already exists", path)
                return
        bucket_name, bucket_path = self._parse_path(path)
        if bucket_path == "":
            self.client.create_bucket(bucket_name)
        else:
            bucket = self._get_bucket(bucket_name, throw=True)
            bucket.blob(bucket_path).upload_from_string(
                b"", content_type="application/x-directory") 
Example #3
Source File: server.py    From healthcare-deid with Apache License 2.0 6 votes vote down vote up
def verify_gcs_path(path):
  """Verifies that a GCS path exists.

  Args:
    path: A string that represents the target path.
  Returns:
    A boolean of the verification status.
  """
  storage_client = storage.Client()
  path_info = gcsutil.GcsFileName.from_path(path)
  try:
    bucket = storage_client.get_bucket(path_info.bucket)
  except exceptions.NotFound:
    return False
  return storage.Blob(bucket=bucket,
                      name=path_info.blob).exists(storage_client) 
Example #4
Source File: PrettyDataGenerator.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def write_n_line_file_to_gcs(project, temp_location, n):
    """
    Write an n-line file to the temp_location in Google Cloud Storage.
    Args:
        project: A string containing the GCP project-id.
        temp_location: A string specifying a GCS location to write to.
        n: An integer specifying the number of lines to write to a file.
    """
    # Prepare to write gcs file 'temp_num_records.txt' in the temp_location.
    bucket_name, path = temp_location.replace('gs://', '').split('/', 1)

    gcs_client = gcs.Client(project=project)
    temp_bucket = gcs_client.get_bucket(bucket_name)
    temp_blob = gcs.Blob(path + '/temp_num_records%s.txt' % uuid4(),
                         temp_bucket)

    # Write num_records newlines to a file_string. These will be our initial
    # PCollection elements.
    # This method was chosen because it proved more performant than beam.Create
    # for a large initial
    # PColleciton and to take advantage of distributed read from GCS.
    file_string = '\n' * int(n)
    temp_blob.upload_from_string(file_string)
    return temp_blob 
Example #5
Source File: __init__.py    From jgscm with MIT License 6 votes vote down vote up
def _read_file(self, blob, format):
        """Reads a non-notebook file.

        blob: instance of :class:`google.cloud.storage.Blob`.
        format:
          If "text", the contents will be decoded as UTF-8.
          If "base64", the raw bytes contents will be encoded as base64.
          If not specified, try to decode as UTF-8, and fall back to base64
        """
        bcontent = blob.download_as_string()

        if format is None or format == "text":
            # Try to interpret as unicode if format is unknown or if unicode
            # was explicitly requested.
            try:
                return bcontent.decode("utf8"), "text"
            except UnicodeError:
                if format == "text":
                    raise web.HTTPError(
                        400, "%s is not UTF-8 encoded" %
                             self._get_blob_path(blob),
                        reason="bad format",
                    )
        return base64.encodebytes(bcontent).decode("ascii"), "base64" 
Example #6
Source File: __init__.py    From jgscm with MIT License 6 votes vote down vote up
def _get_blob_name(blob):
        """
        Gets blob name (last part of the path).
        :param blob: instance of :class:`google.cloud.storage.Blob`.
        :return: name string.
        """
        if isinstance(blob, Blob):
            return os.path.basename(blob.name)
        assert isinstance(blob, (unicode, str))
        if blob.endswith("/"):
            blob = blob[:-1]
        return os.path.basename(blob) 
Example #7
Source File: __init__.py    From jgscm with MIT License 6 votes vote down vote up
def _get_blob_path(blob):
        """
        Gets blob path.
        :param blob: instance of :class:`google.cloud.storage.Blob`.
        :return: path string.
        """
        return blob.bucket.name + "/" + blob.name 
Example #8
Source File: __init__.py    From jgscm with MIT License 6 votes vote down vote up
def get(self, path, content=True, type=None, format=None):
        if isinstance(path, Blob):
            obj = path
            path = self._get_blob_path(obj)
        elif path.startswith("/"):
            path = path[1:]
        if not path:
            path = self.default_path

        type = self._resolve_storagetype(path, type)
        if type == "directory":
            if path and not path.endswith("/"):
                path += "/"
            exists, members = self._fetch(path, content=content)
            if not exists:
                raise web.HTTPError(404, u"No such directory: %s" % path)
            model = self._dir_model(path, members, content=content)
        else:
            exists, blob = self._fetch(path)
            if not exists:
                raise web.HTTPError(404, u"No such file: %s" % path)
            if type == "notebook" or (type is None and path.endswith(".ipynb")):
                model = self._notebook_model(blob, content=content)
            else:
                model = self._file_model(blob, content=content, format=format)
        return model 
Example #9
Source File: snippets.py    From python-storage with Apache License 2.0 6 votes vote down vote up
def delete_blob(to_delete):
    # [START delete_blob]
    from google.cloud.exceptions import NotFound

    client = storage.Client()
    bucket = client.get_bucket("my-bucket")
    blobs = list(bucket.list_blobs())
    assert len(blobs) > 0
    # [<Blob: my-bucket, my-file.txt>]
    bucket.delete_blob("my-file.txt")
    try:
        bucket.delete_blob("doesnt-exist")
    except NotFound:
        pass
    # [END delete_blob]

    blob = None
    # [START delete_blobs]
    bucket.delete_blobs([blob], on_error=lambda blob: None)
    # [END delete_blobs]

    to_delete.append(bucket) 
Example #10
Source File: gcloud.py    From django-storages with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def url(self, name):
        """
        Return public url or a signed url for the Blob.
        This DOES NOT check for existance of Blob - that makes codes too slow
        for many use cases.
        """
        name = self._normalize_name(clean_name(name))
        blob = self.bucket.blob(name)

        if not self.custom_endpoint and self.default_acl == 'publicRead':
            return blob.public_url
        elif self.default_acl == 'publicRead':
            return '{storage_base_url}/{quoted_name}'.format(
                storage_base_url=self.custom_endpoint,
                quoted_name=_quote(name, safe=b"/~"),
            )
        elif not self.custom_endpoint:
            return blob.generate_signed_url(self.expiration)
        else:
            return blob.generate_signed_url(
                expiration=self.expiration,
                api_access_endpoint=self.custom_endpoint,
            ) 
Example #11
Source File: storage.py    From loaner with Apache License 2.0 6 votes vote down vote up
def insert_blob(self, path, contents, bucket_name=None):
    """Inserts a new json encoded Blob in the Cloud Storage bucket provided.

    NOTE: If a Blob already exists at the provided path it will be overwritten
    by the new contents without warning.

    Args:
      path: str, the path of the Blob to create relative to the root of the
          Google Cloud Storage Bucket including the name of the Blob.
      contents: dict, a dictionary representing the contents of the new Blob.
      bucket_name: str, the name of the Google Cloud Storage Bucket to insert
          the new Blob into.
    """
    bucket_name = bucket_name or self._config.bucket

    blob = storage.Blob(
        name=path,
        bucket=self.get_bucket(bucket_name),
    )

    blob.upload_from_string(
        data=json.dumps(contents),
        content_type='application/json',
        client=self._client,
    )

    logging.info(
        'Successfully uploaded blob %r to bucket %r.', path, bucket_name) 
Example #12
Source File: output_manager.py    From turbinia with Apache License 2.0 6 votes vote down vote up
def copy_to(self, source_path):
    if os.path.getsize(source_path) == 0:
      message = (
          'Local source file {0:s} is empty.  Not uploading to GCS'.format(
              source_path))
      log.error(message)
      raise TurbiniaException(message)

    bucket = self.client.get_bucket(self.bucket)
    destination_path = os.path.join(
        self.base_output_dir, self.unique_dir, os.path.basename(source_path))
    log.info(
        'Writing {0:s} to GCS path {1:s}'.format(source_path, destination_path))
    try:
      blob = storage.Blob(destination_path, bucket, chunk_size=self.CHUNK_SIZE)
      blob.upload_from_filename(source_path, client=self.client)
    except exceptions.GoogleCloudError as exception:
      message = 'File upload to GCS failed: {0!s}'.format(exception)
      log.error(message)
      raise TurbiniaException(message)
    return os.path.join('gs://', self.bucket, destination_path) 
Example #13
Source File: import_product_sets_test.py    From python-docs-samples with Apache License 2.0 6 votes vote down vote up
def setup_teardown():
    # Create the product set csv file locally and upload it to GCS
    # This is so that there is a unique product set ID for all python version
    # tests.
    client = storage.Client(project=PROJECT_ID)
    bucket = client.get_bucket(PROJECT_ID)
    blob = storage.Blob("vision/{}.csv".format(FILENAME), bucket)
    blob.upload_from_string(
        '"gs://cloud-samples-data/vision/product_search/shoes_1.jpg",' +
        '"{}",'.format(IMAGE_URI_1) +
        '"{}",'.format(PRODUCT_SET_ID) +
        '"{}",'.format(PRODUCT_ID_1) +
        '"apparel",,"style=womens","0.1,0.1,0.9,0.1,0.9,0.9,0.1,0.9"')

    yield

    delete_product(PROJECT_ID, LOCATION, PRODUCT_ID_1)
    delete_product_set(PROJECT_ID, LOCATION, PRODUCT_SET_ID)
    # Delete the created file
    blob.delete(client) 
Example #14
Source File: embedding.py    From tensorboardX with MIT License 6 votes vote down vote up
def maybe_upload_file(local_path):
    '''Upload a file to remote cloud storage
    if the path starts with gs:// or s3://
    '''
    if local_path.startswith(('s3://', 'gs://')):
        prefix = local_path.split(':')[0]
        remote_bucket_path = local_path[len("s3://"):]  # same length
        bp = remote_bucket_path.split("/")
        bucket = bp[0]
        path = remote_bucket_path[1 + len(bucket):]

        # s3://example/file becomes s3:/example/file in Linux
        local_path = prefix + ':/' + remote_bucket_path
        if prefix == 's3':
            import boto3
            s3 = boto3.client('s3', endpoint_url=os.environ.get('S3_ENDPOINT'))
            s3.upload_file(local_path, bucket, path)

        elif prefix == 'gs':
            from google.cloud import storage
            client = storage.Client()

            Hbucket = storage.Bucket(client, bucket)
            blob = storage.Blob(path, Hbucket)
            blob.upload_from_filename(local_path) 
Example #15
Source File: test_system.py    From python-storage with Apache License 2.0 5 votes vote down vote up
def test_copy_file_with_metageneration_match(self):
        new_bucket_name = "generation-match" + unique_resource_id("-")
        created = retry_429_503(Config.CLIENT.create_bucket)(
            new_bucket_name, requester_pays=True
        )
        self.case_buckets_to_delete.append(new_bucket_name)
        self.assertEqual(created.name, new_bucket_name)

        to_delete = []
        blob = storage.Blob("simple", bucket=created)
        blob.upload_from_string(b"DEADBEEF")
        to_delete.append(blob)
        try:
            dest_bucket = Config.CLIENT.bucket(new_bucket_name)

            new_blob = dest_bucket.copy_blob(
                blob,
                dest_bucket,
                "simple-copy",
                if_source_metageneration_match=blob.metageneration,
            )
            to_delete.append(new_blob)

            base_contents = blob.download_as_string()
            copied_contents = new_blob.download_as_string()
            self.assertEqual(base_contents, copied_contents)
        finally:
            for blob in to_delete:
                retry_429_harder(blob.delete)() 
Example #16
Source File: test_system.py    From python-storage with Apache License 2.0 5 votes vote down vote up
def test_copy_file_with_generation_match(self):
        new_bucket_name = "generation-match" + unique_resource_id("-")
        created = retry_429_503(Config.CLIENT.create_bucket)(
            new_bucket_name, requester_pays=True
        )
        self.case_buckets_to_delete.append(new_bucket_name)
        self.assertEqual(created.name, new_bucket_name)

        to_delete = []
        blob = storage.Blob("simple", bucket=created)
        blob.upload_from_string(b"DEADBEEF")
        to_delete.append(blob)
        try:
            dest_bucket = Config.CLIENT.bucket(new_bucket_name)

            new_blob = dest_bucket.copy_blob(
                blob,
                dest_bucket,
                "simple-copy",
                if_source_generation_match=blob.generation,
            )
            to_delete.append(new_blob)

            base_contents = blob.download_as_string()
            copied_contents = new_blob.download_as_string()
            self.assertEqual(base_contents, copied_contents)
        finally:
            for blob in to_delete:
                retry_429_harder(blob.delete)() 
Example #17
Source File: gcs.py    From stoq-plugins-public with Apache License 2.0 5 votes vote down vote up
def _upload(self, payload: bytes, filename: str, bucket: str) -> None:
        """
        Upload a payload to GCS

        """

        client = Client(project=self.project_id)
        count = 0
        while count < self.max_retries:
            try:
                bucket_obj = client.get_bucket(bucket)
                if self.use_encryption:
                    payload = self._encrypt(payload)
                content = BytesIO(payload)
                blob = Blob(filename, bucket_obj)
                blob.upload_from_file(content)
                break
            except (
                InvalidResponse,
                GoogleAPICallError,
                InternalServerError,
                SSLError,
            ) as e:
                if count >= self.max_retries:
                    raise StoqPluginException(
                        f'Failed to upload {bucket}/{filename} to GCS: {str(e)}'
                    )
                count += 1
                sleep(randrange(0, 4)) 
Example #18
Source File: benchwrapper.py    From python-storage with Apache License 2.0 5 votes vote down vote up
def Read(self, request, context):
        bucket = client.bucket(request.bucketName)
        blob = storage.Blob(request.objectName, bucket)
        blob.download_as_string()
        return storage_pb2.EmptyResponse() 
Example #19
Source File: snippets.py    From python-storage with Apache License 2.0 5 votes vote down vote up
def get_blob(to_delete):
    from google.cloud.storage.blob import Blob

    # [START get_blob]
    client = storage.Client()
    bucket = client.get_bucket("my-bucket")
    assert isinstance(bucket.get_blob("/path/to/blob.txt"), Blob)
    # <Blob: my-bucket, /path/to/blob.txt>
    assert not bucket.get_blob("/does-not-exist.txt")
    # None
    # [END get_blob]

    to_delete.append(bucket) 
Example #20
Source File: snippets.py    From python-storage with Apache License 2.0 5 votes vote down vote up
def upload_from_file(to_delete):
    # [START upload_from_file]
    from google.cloud.storage import Blob

    client = storage.Client(project="my-project")
    bucket = client.get_bucket("my-bucket")
    encryption_key = "aa426195405adee2c8081bb9e7e74b19"
    blob = Blob("secure-data", bucket, encryption_key=encryption_key)
    with open("my-file", "rb") as my_file:
        blob.upload_from_file(my_file)
    # [END upload_from_file]

    to_delete.append(blob) 
Example #21
Source File: snippets.py    From python-storage with Apache License 2.0 5 votes vote down vote up
def download_to_file(to_delete):
    # [START download_to_file]
    from google.cloud.storage import Blob

    client = storage.Client(project="my-project")
    bucket = client.get_bucket("my-bucket")
    encryption_key = "c7f32af42e45e85b9848a6a14dd2a8f6"
    blob = Blob("secure-data", bucket, encryption_key=encryption_key)
    blob.upload_from_string("my secret message.")
    with open("/tmp/my-secure-file", "wb") as file_obj:
        blob.download_to_file(file_obj)
    # [END download_to_file]

    to_delete.append(blob) 
Example #22
Source File: psi.py    From python-script with Apache License 2.0 5 votes vote down vote up
def save(url, report):
    '''Save to https://console.cloud.google.com/storage/browser/[bucket-id]/'''
    client = storage.Client()
    bucket = client.get_bucket("psi-report")
    blob = Blob(f"${parse.quote_plus(url)}.json", bucket)
    blob.upload_from_string(report, "application/json") 
Example #23
Source File: test_system.py    From python-storage with Apache License 2.0 5 votes vote down vote up
def test_write_metadata(self):
        filename = self.FILES["logo"]["path"]
        blob_name = os.path.basename(filename)

        blob = storage.Blob(blob_name, bucket=self.bucket)
        blob.upload_from_filename(filename)
        self.case_blobs_to_delete.append(blob)

        # NOTE: This should not be necessary. We should be able to pass
        #       it in to upload_file and also to upload_from_string.
        blob.content_type = "image/png"
        self.assertEqual(blob.content_type, "image/png") 
Example #24
Source File: test_system.py    From python-storage with Apache License 2.0 5 votes vote down vote up
def test_copy_existing_file(self):
        filename = self.FILES["logo"]["path"]
        blob = storage.Blob("CloudLogo", bucket=self.bucket)
        blob.upload_from_filename(filename)
        self.case_blobs_to_delete.append(blob)

        new_blob = retry_bad_copy(self.bucket.copy_blob)(
            blob, self.bucket, "CloudLogoCopy"
        )
        self.case_blobs_to_delete.append(new_blob)

        base_contents = blob.download_as_string()
        copied_contents = new_blob.download_as_string()
        self.assertEqual(base_contents, copied_contents) 
Example #25
Source File: test_system.py    From python-storage with Apache License 2.0 5 votes vote down vote up
def setUpClass(cls):
        super(TestStorageListFiles, cls).setUpClass()
        # Make sure bucket empty before beginning.
        _empty_bucket(cls.bucket)

        logo_path = cls.FILES["logo"]["path"]
        blob = storage.Blob(cls.FILENAMES[0], bucket=cls.bucket)
        blob.upload_from_filename(logo_path)
        cls.suite_blobs_to_delete = [blob]

        # Copy main blob onto remaining in FILENAMES.
        for filename in cls.FILENAMES[1:]:
            new_blob = retry_bad_copy(cls.bucket.copy_blob)(blob, cls.bucket, filename)
            cls.suite_blobs_to_delete.append(new_blob) 
Example #26
Source File: bucket_mover_service.py    From professional-services with Apache License 2.0 5 votes vote down vote up
def _lock_down_bucket(spinner, cloud_logger, bucket, lock_file_name,
                      service_account_email):
    """Change the ACL/IAM on the bucket so that only the service account can access it.

    Args:
        spinner: The spinner displayed in the console
        cloud_logger: A GCP logging client instance
        bucket: The bucket object to lock down
        lock_file_name: The name of the lock file
        service_account_email: The email of the service account
    """

    if storage.Blob(lock_file_name, bucket).exists():
        spinner.fail('X')
        msg = 'The lock file exists in the source bucket, so we cannot continue'
        cloud_logger.log_text(msg)
        raise SystemExit(msg)

    spinner.ok(_CHECKMARK)
    msg = 'Locking down the bucket by revoking all ACLs/IAM policies'
    spinner.text = msg
    cloud_logger.log_text(msg)

    # Turn off any bucket ACLs
    bucket.acl.save_predefined('private')

    # Revoke all IAM access and only set the service account as an admin
    policy = api_core_iam.Policy()
    policy['roles/storage.admin'].add('serviceAccount:' + service_account_email)
    bucket.set_iam_policy(policy) 
Example #27
Source File: gcloud.py    From django-storages with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, name, mode, storage):
        self.name = name
        self.mime_type = mimetypes.guess_type(name)[0]
        self._mode = mode
        self._storage = storage
        self.blob = storage.bucket.get_blob(name)
        if not self.blob and 'w' in mode:
            self.blob = Blob(
                self.name, storage.bucket,
                chunk_size=storage.blob_chunk_size)
        self._file = None
        self._is_dirty = False 
Example #28
Source File: gcp_storage_object.py    From google.cloud with GNU General Public License v3.0 5 votes vote down vote up
def download_file(module, client, name, dest):
    try:
        bucket = client.get_bucket(module.params['bucket'])
        blob = Blob(name, bucket)
        with open(dest, "wb") as file_obj:
            blob.download_to_file(file_obj)
        return blob_to_dict(blob)
    except google.cloud.exceptions.NotFound as e:
        module.fail_json(msg=str(e)) 
Example #29
Source File: gcp_storage_object.py    From google.cloud with GNU General Public License v3.0 5 votes vote down vote up
def upload_file(module, client, src, dest):
    try:
        bucket = client.get_bucket(module.params['bucket'])
        blob = Blob(dest, bucket)
        with open(src, "r") as file_obj:
            blob.upload_from_file(file_obj)
        return blob_to_dict(blob)
    except google.cloud.exceptions.GoogleCloudError as e:
        module.fail_json(msg=str(e)) 
Example #30
Source File: gcp_storage_object.py    From google.cloud with GNU General Public License v3.0 5 votes vote down vote up
def delete_file(module, client, name):
    try:
        bucket = client.get_bucket(module.params['bucket'])
        blob = Blob(name, bucket)
        blob.delete()
        return {}
    except google.cloud.exceptions.NotFound as e:
        module.fail_json(msg=str(e))