Python google.cloud.storage.Blob() Examples

The following are 30 code examples for showing how to use google.cloud.storage.Blob(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module google.cloud.storage , or try the search function .

Example 1
Project: jgscm   Author: src-d   File: __init__.py    License: MIT License 6 votes vote down vote up
def _get_blob_name(blob):
        """
        Gets blob name (last part of the path).
        :param blob: instance of :class:`google.cloud.storage.Blob`.
        :return: name string.
        """
        if isinstance(blob, Blob):
            return os.path.basename(blob.name)
        assert isinstance(blob, (unicode, str))
        if blob.endswith("/"):
            blob = blob[:-1]
        return os.path.basename(blob) 
Example 2
Project: jgscm   Author: src-d   File: __init__.py    License: MIT License 6 votes vote down vote up
def _read_file(self, blob, format):
        """Reads a non-notebook file.

        blob: instance of :class:`google.cloud.storage.Blob`.
        format:
          If "text", the contents will be decoded as UTF-8.
          If "base64", the raw bytes contents will be encoded as base64.
          If not specified, try to decode as UTF-8, and fall back to base64
        """
        bcontent = blob.download_as_string()

        if format is None or format == "text":
            # Try to interpret as unicode if format is unknown or if unicode
            # was explicitly requested.
            try:
                return bcontent.decode("utf8"), "text"
            except UnicodeError:
                if format == "text":
                    raise web.HTTPError(
                        400, "%s is not UTF-8 encoded" %
                             self._get_blob_path(blob),
                        reason="bad format",
                    )
        return base64.encodebytes(bcontent).decode("ascii"), "base64" 
Example 3
Project: jgscm   Author: src-d   File: __init__.py    License: MIT License 6 votes vote down vote up
def _save_directory(self, path, model):
        """Creates a directory in GCS."""
        exists, obj = self._fetch(path)
        if exists:
            if isinstance(obj, Blob):
                raise web.HTTPError(400, u"Not a directory: %s" % path)
            else:
                self.log.debug("Directory %r already exists", path)
                return
        bucket_name, bucket_path = self._parse_path(path)
        if bucket_path == "":
            self.client.create_bucket(bucket_name)
        else:
            bucket = self._get_bucket(bucket_name, throw=True)
            bucket.blob(bucket_path).upload_from_string(
                b"", content_type="application/x-directory") 
Example 4
Project: tensorboardX   Author: lanpa   File: embedding.py    License: MIT License 6 votes vote down vote up
def maybe_upload_file(local_path):
    '''Upload a file to remote cloud storage
    if the path starts with gs:// or s3://
    '''
    if local_path.startswith(('s3://', 'gs://')):
        prefix = local_path.split(':')[0]
        remote_bucket_path = local_path[len("s3://"):]  # same length
        bp = remote_bucket_path.split("/")
        bucket = bp[0]
        path = remote_bucket_path[1 + len(bucket):]

        # s3://example/file becomes s3:/example/file in Linux
        local_path = prefix + ':/' + remote_bucket_path
        if prefix == 's3':
            import boto3
            s3 = boto3.client('s3', endpoint_url=os.environ.get('S3_ENDPOINT'))
            s3.upload_file(local_path, bucket, path)

        elif prefix == 'gs':
            from google.cloud import storage
            client = storage.Client()

            Hbucket = storage.Bucket(client, bucket)
            blob = storage.Blob(path, Hbucket)
            blob.upload_from_filename(local_path) 
Example 5
Project: tensorboardX   Author: lanpa   File: embedding.py    License: MIT License 6 votes vote down vote up
def maybe_upload_file(local_path):
    '''Upload a file to remote cloud storage
    if the path starts with gs:// or s3://
    '''
    if local_path.startswith(('s3://', 'gs://')):
        prefix = local_path.split(':')[0]
        remote_bucket_path = local_path[len("s3://"):]  # same length
        bp = remote_bucket_path.split("/")
        bucket = bp[0]
        path = remote_bucket_path[1 + len(bucket):]

        # s3://example/file becomes s3:/example/file in Linux
        local_path = prefix + ':/' + remote_bucket_path
        if prefix == 's3':
            import boto3
            s3 = boto3.client('s3', endpoint_url=os.environ.get('S3_ENDPOINT'))
            s3.upload_file(local_path, bucket, path)

        elif prefix == 'gs':
            from google.cloud import storage
            client = storage.Client()

            Hbucket = storage.Bucket(client, bucket)
            blob = storage.Blob(path, Hbucket)
            blob.upload_from_filename(local_path) 
Example 6
Project: turbinia   Author: google   File: output_manager.py    License: Apache License 2.0 6 votes vote down vote up
def copy_to(self, source_path):
    if os.path.getsize(source_path) == 0:
      message = (
          'Local source file {0:s} is empty.  Not uploading to GCS'.format(
              source_path))
      log.error(message)
      raise TurbiniaException(message)

    bucket = self.client.get_bucket(self.bucket)
    destination_path = os.path.join(
        self.base_output_dir, self.unique_dir, os.path.basename(source_path))
    log.info(
        'Writing {0:s} to GCS path {1:s}'.format(source_path, destination_path))
    try:
      blob = storage.Blob(destination_path, bucket, chunk_size=self.CHUNK_SIZE)
      blob.upload_from_filename(source_path, client=self.client)
    except exceptions.GoogleCloudError as exception:
      message = 'File upload to GCS failed: {0!s}'.format(exception)
      log.error(message)
      raise TurbiniaException(message)
    return os.path.join('gs://', self.bucket, destination_path) 
Example 7
Project: python-docs-samples   Author: GoogleCloudPlatform   File: import_product_sets_test.py    License: Apache License 2.0 6 votes vote down vote up
def setup_teardown():
    # Create the product set csv file locally and upload it to GCS
    # This is so that there is a unique product set ID for all python version
    # tests.
    client = storage.Client(project=PROJECT_ID)
    bucket = client.get_bucket(PROJECT_ID)
    blob = storage.Blob("vision/{}.csv".format(FILENAME), bucket)
    blob.upload_from_string(
        '"gs://cloud-samples-data/vision/product_search/shoes_1.jpg",' +
        '"{}",'.format(IMAGE_URI_1) +
        '"{}",'.format(PRODUCT_SET_ID) +
        '"{}",'.format(PRODUCT_ID_1) +
        '"apparel",,"style=womens","0.1,0.1,0.9,0.1,0.9,0.9,0.1,0.9"')

    yield

    delete_product(PROJECT_ID, LOCATION, PRODUCT_ID_1)
    delete_product_set(PROJECT_ID, LOCATION, PRODUCT_SET_ID)
    # Delete the created file
    blob.delete(client) 
Example 8
Project: python-storage   Author: googleapis   File: snippets.py    License: Apache License 2.0 6 votes vote down vote up
def delete_blob(to_delete):
    # [START delete_blob]
    from google.cloud.exceptions import NotFound

    client = storage.Client()
    bucket = client.get_bucket("my-bucket")
    blobs = list(bucket.list_blobs())
    assert len(blobs) > 0
    # [<Blob: my-bucket, my-file.txt>]
    bucket.delete_blob("my-file.txt")
    try:
        bucket.delete_blob("doesnt-exist")
    except NotFound:
        pass
    # [END delete_blob]

    blob = None
    # [START delete_blobs]
    bucket.delete_blobs([blob], on_error=lambda blob: None)
    # [END delete_blobs]

    to_delete.append(bucket) 
Example 9
Project: healthcare-deid   Author: GoogleCloudPlatform   File: server.py    License: Apache License 2.0 6 votes vote down vote up
def verify_gcs_path(path):
  """Verifies that a GCS path exists.

  Args:
    path: A string that represents the target path.
  Returns:
    A boolean of the verification status.
  """
  storage_client = storage.Client()
  path_info = gcsutil.GcsFileName.from_path(path)
  try:
    bucket = storage_client.get_bucket(path_info.bucket)
  except exceptions.NotFound:
    return False
  return storage.Blob(bucket=bucket,
                      name=path_info.blob).exists(storage_client) 
Example 10
Project: professional-services   Author: GoogleCloudPlatform   File: PrettyDataGenerator.py    License: Apache License 2.0 6 votes vote down vote up
def write_n_line_file_to_gcs(project, temp_location, n):
    """
    Write an n-line file to the temp_location in Google Cloud Storage.
    Args:
        project: A string containing the GCP project-id.
        temp_location: A string specifying a GCS location to write to.
        n: An integer specifying the number of lines to write to a file.
    """
    # Prepare to write gcs file 'temp_num_records.txt' in the temp_location.
    bucket_name, path = temp_location.replace('gs://', '').split('/', 1)

    gcs_client = gcs.Client(project=project)
    temp_bucket = gcs_client.get_bucket(bucket_name)
    temp_blob = gcs.Blob(path + '/temp_num_records%s.txt' % uuid4(),
                         temp_bucket)

    # Write num_records newlines to a file_string. These will be our initial
    # PCollection elements.
    # This method was chosen because it proved more performant than beam.Create
    # for a large initial
    # PColleciton and to take advantage of distributed read from GCS.
    file_string = '\n' * int(n)
    temp_blob.upload_from_string(file_string)
    return temp_blob 
Example 11
Project: django-storages   Author: jschneier   File: gcloud.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def url(self, name):
        """
        Return public url or a signed url for the Blob.
        This DOES NOT check for existance of Blob - that makes codes too slow
        for many use cases.
        """
        name = self._normalize_name(clean_name(name))
        blob = self.bucket.blob(name)

        if not self.custom_endpoint and self.default_acl == 'publicRead':
            return blob.public_url
        elif self.default_acl == 'publicRead':
            return '{storage_base_url}/{quoted_name}'.format(
                storage_base_url=self.custom_endpoint,
                quoted_name=_quote(name, safe=b"/~"),
            )
        elif not self.custom_endpoint:
            return blob.generate_signed_url(self.expiration)
        else:
            return blob.generate_signed_url(
                expiration=self.expiration,
                api_access_endpoint=self.custom_endpoint,
            ) 
Example 12
Project: neural-fingerprinting   Author: StephanZheng   File: cloud_client.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_blob(self, blob_name):
    """Gets google.cloud.storage.blob.Blob object by blob name."""
    return self.bucket.get_blob(blob_name) 
Example 13
Project: neural-fingerprinting   Author: StephanZheng   File: cloud_client.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def new_blob(self, blob_name):
    """Creates new storage blob with provided name."""
    return storage.Blob(blob_name, self.bucket) 
Example 14
Project: gcp-variant-transforms   Author: googlegenomics   File: vcf_file_composer.py    License: Apache License 2.0 5 votes vote down vote up
def _compose_vcf_data_files(project, vcf_data_files_folder):
  # type: (str, str) -> storage.Blob
  """Composes multiple VCF data files to one VCF data file.

  Args:
    project: The project name.
    vcf_data_files_folder: The folder that contains all VCF data files.
  """
  bucket_name, blob_prefix = gcsio.parse_gcs_path(vcf_data_files_folder)
  multi_process_composer = MultiProcessComposer(project, bucket_name,
                                                blob_prefix)
  return multi_process_composer.get_composed_blob() 
Example 15
Project: gcp-variant-transforms   Author: googlegenomics   File: vcf_file_composer.py    License: Apache License 2.0 5 votes vote down vote up
def _create_blob(client, file_path):
  # type: (storage.Client, str) -> storage.Blob
  bucket_name, blob_name = gcsio.parse_gcs_path(file_path)
  file_blob = client.get_bucket(bucket_name).blob(blob_name)
  file_blob.content_type = 'text/plain'
  return file_blob 
Example 16
Project: gcp-variant-transforms   Author: googlegenomics   File: vcf_file_composer.py    License: Apache License 2.0 5 votes vote down vote up
def get_composed_blob(self):
    # type: () -> storage.Blob
    """Returns the final blob that all blobs composed to."""
    return self._compose_blobs_to_one(self._blob_prefix) 
Example 17
Project: loaner   Author: google   File: storage.py    License: Apache License 2.0 5 votes vote down vote up
def insert_blob(self, path, contents, bucket_name=None):
    """Inserts a new json encoded Blob in the Cloud Storage bucket provided.

    NOTE: If a Blob already exists at the provided path it will be overwritten
    by the new contents without warning.

    Args:
      path: str, the path of the Blob to create relative to the root of the
          Google Cloud Storage Bucket including the name of the Blob.
      contents: dict, a dictionary representing the contents of the new Blob.
      bucket_name: str, the name of the Google Cloud Storage Bucket to insert
          the new Blob into.
    """
    bucket_name = bucket_name or self._config.bucket

    blob = storage.Blob(
        name=path,
        bucket=self.get_bucket(bucket_name),
    )

    blob.upload_from_string(
        data=json.dumps(contents),
        content_type='application/json',
        client=self._client,
    )

    logging.info(
        'Successfully uploaded blob %r to bucket %r.', path, bucket_name) 
Example 18
Project: loaner   Author: google   File: storage.py    License: Apache License 2.0 5 votes vote down vote up
def get_blob(self, path, bucket_name=None):
    """Retrieves a json encoded Blob from Google Cloud Storage as a dictionary.

    Args:
      path: str, the path of the Blob to retrieve relative to the root of the
          Google Cloud Storage Bucket including the name of the Blob.
      bucket_name: str, the name of the Google Cloud Storage Bucket to retrieve
          the Blob from.

    Returns:
      A dictionary of the Blob from Google Cloud Storage.

    Raises:
      NotFoundError: when the path provided is not associated with a Blob in the
          Google Cloud Storage Bucket.
    """
    bucket_name = bucket_name or self._config.bucket

    blob = self.get_bucket(bucket_name).get_blob(path, self._client)

    try:
      contents = blob.download_as_string(self._client)
    except (AttributeError, exceptions.NotFound) as err:
      logging.error(_GET_BLOB_ERROR_MSG, path, bucket_name, err)
      raise NotFoundError(_GET_BLOB_ERROR_MSG % (path, bucket_name, err))
    return json.loads(contents) 
Example 19
Project: resolwe   Author: genialis   File: googleconnector.py    License: Apache License 2.0 5 votes vote down vote up
def exists(self, url):
        """Get if the object at the given URL exist."""
        return storage.Blob(bucket=self.bucket, name=os.fspath(url)).exists() 
Example 20
Project: lm-human-preferences   Author: openai   File: gcs.py    License: MIT License 5 votes vote down vote up
def upload_contents(url, contents, client=None):
    """Given a gs:// path, returns contents of the corresponding blob."""
    if client is None:
        client = storage.Client()
    bucket_name, path = parse_url(url)
    bucket = client.get_bucket(bucket_name)
    blob = storage.Blob(path, bucket)
    blob.upload_from_string(contents) 
Example 21
Project: jgscm   Author: src-d   File: __init__.py    License: MIT License 5 votes vote down vote up
def get(self, path, content=True, type=None, format=None):
        if isinstance(path, Blob):
            obj = path
            path = self._get_blob_path(obj)
        elif path.startswith("/"):
            path = path[1:]
        if not path:
            path = self.default_path

        type = self._resolve_storagetype(path, type)
        if type == "directory":
            if path and not path.endswith("/"):
                path += "/"
            exists, members = self._fetch(path, content=content)
            if not exists:
                raise web.HTTPError(404, u"No such directory: %s" % path)
            model = self._dir_model(path, members, content=content)
        else:
            exists, blob = self._fetch(path)
            if not exists:
                raise web.HTTPError(404, u"No such file: %s" % path)
            if type == "notebook" or (type is None and path.endswith(".ipynb")):
                model = self._notebook_model(blob, content=content)
            else:
                model = self._file_model(blob, content=content, format=format)
        return model 
Example 22
Project: jgscm   Author: src-d   File: __init__.py    License: MIT License 5 votes vote down vote up
def _get_blob_path(blob):
        """
        Gets blob path.
        :param blob: instance of :class:`google.cloud.storage.Blob`.
        :return: path string.
        """
        return blob.bucket.name + "/" + blob.name 
Example 23
Project: jgscm   Author: src-d   File: __init__.py    License: MIT License 5 votes vote down vote up
def _read_notebook(self, blob):
        """
        Reads a notebook file from GCS blob.
        :param blob: :class:`google.cloud.storage.Blob` instance.
        :return: :class:`nbformat.notebooknode.NotebookNode` instance.
        """
        data = blob.download_as_string().decode("utf-8")
        nb = nbformat.reads(data, as_version=4)
        self.mark_trusted_cells(nb, self._get_blob_path(blob))
        return nb 
Example 24
Project: jgscm   Author: src-d   File: __init__.py    License: MIT License 5 votes vote down vote up
def _save_file(self, path, content, format):
        """Uploads content of a generic file to GCS.
        :param: path blob path.
        :param: content file contents string.
        :param: format the description of the input format, can be either
                "text" or "base64".
        :return: created :class:`google.cloud.storage.Blob`.
        """
        bucket_name, bucket_path = self._parse_path(path)
        bucket = self._get_bucket(bucket_name, throw=True)

        if format not in {"text", "base64"}:
            raise web.HTTPError(
                400,
                u"Must specify format of file contents as \"text\" or "
                u"\"base64\"",
            )
        try:
            if format == "text":
                bcontent = content.encode("utf8")
            else:
                b64_bytes = content.encode("ascii")
                bcontent = base64.decodebytes(b64_bytes)
        except Exception as e:
            raise web.HTTPError(
                400, u"Encoding error saving %s: %s" % (path, e)
            )
        blob = bucket.blob(bucket_path)
        blob.upload_from_string(bcontent)
        return blob 
Example 25
Project: tensorboardX   Author: lanpa   File: record_writer.py    License: MIT License 5 votes vote down vote up
def __init__(self, path):
        if not GCS_ENABLED:
            raise ImportError("`google-cloud-storage` must be installed in order to use "
                              "the 'gs://' protocol")

        self.path = path
        self.buffer = io.BytesIO()

        client = storage.Client()
        bucket_name, filepath = self.bucket_and_path()
        bucket = storage.Bucket(client, bucket_name)
        self.blob = storage.Blob(filepath, bucket) 
Example 26
Project: tensorboardX   Author: lanpa   File: record_writer.py    License: MIT License 5 votes vote down vote up
def __init__(self, path):
        if not GCS_ENABLED:
            raise ImportError("`google-cloud-storage` must be installed in order to use "
                              "the 'gs://' protocol")

        self.path = path
        self.buffer = io.BytesIO()

        client = storage.Client()
        bucket_name, filepath = self.bucket_and_path()
        bucket = storage.Bucket(client, bucket_name)
        self.blob = storage.Blob(filepath, bucket) 
Example 27
Project: airflow   Author: apache   File: test_gcs.py    License: Apache License 2.0 5 votes vote down vote up
def test_copy(self, mock_service, mock_bucket):
        source_bucket = 'test-source-bucket'
        source_object = 'test-source-object'
        destination_bucket = 'test-dest-bucket'
        destination_object = 'test-dest-object'

        destination_bucket_instance = mock_bucket
        source_blob = mock_bucket.blob(source_object)
        destination_blob = storage.Blob(
            bucket=destination_bucket_instance,
            name=destination_object)

        # Given
        bucket_mock = mock_service.return_value.bucket
        bucket_mock.return_value = mock_bucket
        copy_method = bucket_mock.return_value.copy_blob
        copy_method.return_value = destination_blob

        # When
        response = self.gcs_hook.copy(  # pylint: disable=assignment-from-no-return
            source_bucket=source_bucket,
            source_object=source_object,
            destination_bucket=destination_bucket,
            destination_object=destination_object
        )

        # Then
        self.assertEqual(response, None)
        copy_method.assert_called_once_with(
            blob=source_blob,
            destination_bucket=destination_bucket_instance,
            new_name=destination_object
        ) 
Example 28
Project: airflow   Author: apache   File: test_gcs.py    License: Apache License 2.0 5 votes vote down vote up
def test_delete(self, mock_service, mock_bucket):
        test_bucket = 'test_bucket'
        test_object = 'test_object'
        blob_to_be_deleted = storage.Blob(name=test_object, bucket=mock_bucket)

        get_bucket_method = mock_service.return_value.get_bucket
        get_blob_method = get_bucket_method.return_value.get_blob
        delete_method = get_blob_method.return_value.delete
        delete_method.return_value = blob_to_be_deleted

        response = self.gcs_hook.delete(  # pylint: disable=assignment-from-no-return
            bucket_name=test_bucket,
            object_name=test_object)
        self.assertIsNone(response) 
Example 29
Project: airflow   Author: apache   File: gcs.py    License: Apache License 2.0 5 votes vote down vote up
def delete(self, bucket_name, object_name):
        """
        Deletes an object from the bucket.

        :param bucket_name: name of the bucket, where the object resides
        :type bucket_name: str
        :param object_name: name of the object to delete
        :type object_name: str
        """
        client = self.get_conn()
        bucket = client.bucket(bucket_name)
        blob = bucket.blob(blob_name=object_name)
        blob.delete()

        self.log.info('Blob %s deleted.', object_name) 
Example 30
Project: airflow   Author: apache   File: gcs.py    License: Apache License 2.0 5 votes vote down vote up
def _calculate_sync_destination_path(
        self,
        blob: storage.Blob,
        destination_object: Optional[str],
        source_object_prefix_len: int
    ) -> str:
        return (
            path.join(destination_object, blob.name[source_object_prefix_len:])
            if destination_object
            else blob.name[source_object_prefix_len:]
        )