Python google.cloud.storage.Client() Examples

The following are 30 code examples for showing how to use google.cloud.storage.Client(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module google.cloud.storage , or try the search function .

Example 1
Project: gcp-variant-transforms   Author: googlegenomics   File: vcf_file_composer.py    License: Apache License 2.0 6 votes vote down vote up
def _compose_files(project, bucket_name, blob_names, composite_name):
  # type: (str, str, List[str], str) -> None
  """Composes multiple files (up to 32 objects) in GCS to one.

  Args:
    project: The project name.
    bucket_name: The name of the bucket where the `components` and the new
      composite are saved.
    blob_names: A list of blob object names.
    composite_name: Name of the new composite.
  """
  bucket = storage.Client(project).get_bucket(bucket_name)
  output_file_blob = bucket.blob(composite_name)
  output_file_blob.content_type = 'text/plain'
  blobs = [bucket.get_blob(blob_name) for blob_name in blob_names]
  output_file_blob.compose(blobs) 
Example 2
Project: gcp-variant-transforms   Author: googlegenomics   File: vcf_file_composer.py    License: Apache License 2.0 6 votes vote down vote up
def __init__(self, project, bucket_name, blob_prefix):
    # type: (str, str, str) -> None
    """Initializes a `MultiProcessComposer`.

    This class composes all blobs that start with `blob_prefix` to one.

    Args:
      project: The project name.
      bucket_name: The name of the bucket where the blob components and the new
        composite are saved.
      blob_prefix: The prefix used to filter blobs. Only the blobs with this
        prefix will be composed.
    """
    self._project = project
    self._bucket_name = bucket_name
    self._blob_prefix = blob_prefix
    self._bucket = storage.Client(project).get_bucket(bucket_name) 
Example 3
Project: loaner   Author: google   File: storage.py    License: Apache License 2.0 6 votes vote down vote up
def from_config(cls, config, creds=None):
    """Returns an initialized CloudStorageAPI object.

    Args:
      config: common.ProjectConfig, the project configuration.
      creds: auth.CloudCredentials, the credentials to use for client
          authentication.

    Returns:
      An authenticated CloudStorageAPI instance.
    """
    if creds is None:
      creds = auth.CloudCredentials(config, cls.SCOPES)
    client = storage.Client(
        project=config.project, credentials=creds.get_credentials(cls.SCOPES))
    return cls(config, client) 
Example 4
Project: single_cell_portal   Author: broadinstitute   File: manage_study.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def download_from_bucket(file_path):
    """Downloads file from Google Cloud Storage bucket"""

    path_segments = file_path[5:].split("/")

    storage_client = storage.Client()
    bucket_name = path_segments[0]
    bucket = storage_client.get_bucket(bucket_name)
    source = "/".join(path_segments[1:])

    blob = bucket.blob(source)
    destination = "/tmp/" + source.replace("/", "%2f")
    blob.download_to_filename(destination)
    print(f"{file_path} downloaded to {destination}.")

    return destination 
Example 5
Project: lookml-tools   Author: ww-tech   File: bq_writer.py    License: Apache License 2.0 6 votes vote down vote up
def _upload_to_gcs(self, gcs_project_id, target_bucket_name, bucket_folder, filename):
        '''upload CSV to file in GCS

        Args:
            gcs_project_id (str): project name
            target_bucket_name (str): name of GCS bucket
            bucket_folder (str): name of GCS folder
            filename (str): filepath to upload

        Returns:
            nothing. Side effect is that data is uploaded to GCS

        '''
        storage_client = storage.Client(gcs_project_id)
        bucket = storage_client.get_bucket(target_bucket_name)
        path = bucket_folder + os.sep + filename
        logging.info("Loading to GCS: %s", path)
        blob = bucket.blob(path) #name in GCS
        blob.upload_from_filename(filename) 
Example 6
Project: koku   Author: project-koku   File: provider.py    License: GNU Affero General Public License v3.0 6 votes vote down vote up
def cost_usage_source_is_reachable(self, credential_name, data_source):
        """
        Verify that the GCP bucket exists and is reachable.

        Args:
            credential_name (object): not used; only present for interface compatibility
            data_source (dict): dict containing name of GCP storage bucket

        """
        storage_client = storage.Client()
        bucket = data_source["bucket"]
        try:
            bucket_info = storage_client.lookup_bucket(bucket)
            if not bucket_info:
                # if the lookup does not return anything, then this is an nonexistent bucket
                key = "billing_source.bucket"
                message = f"The provided GCP bucket {bucket} does not exist"
                raise serializers.ValidationError(error_obj(key, message))

        except GoogleCloudError as e:
            key = "billing_source.bucket"
            raise serializers.ValidationError(error_obj(key, e.message))

        return True 
Example 7
Project: koku   Author: project-koku   File: gcp_report_downloader.py    License: GNU Affero General Public License v3.0 6 votes vote down vote up
def __init__(self, task, customer_name, billing_source, **kwargs):
        """
        Constructor.

        Args:
            task           (Object) bound celery object
            customer_name  (str): Name of the customer
            billing_source (dict): dict containing name of GCP storage bucket

        """
        super().__init__(task, **kwargs)

        self.bucket_name = billing_source["bucket"]
        self.report_prefix = billing_source.get("report_prefix", "")
        self.customer_name = customer_name.replace(" ", "_")
        self._provider_uuid = kwargs.get("provider_uuid")

        try:
            GCPProvider().cost_usage_source_is_reachable(None, billing_source)
            self._storage_client = storage.Client()
            self._bucket_info = self._storage_client.lookup_bucket(self.bucket_name)
        except ValidationError as ex:
            msg = f"GCP bucket {self.bucket_name} for customer {customer_name} is not reachable. Error: {str(ex)}"
            LOG.error(log_json(self.request_id, msg, self.context))
            raise GCPReportDownloaderError(str(ex)) 
Example 8
Project: vimss   Author: Veleslavia   File: Utils.py    License: GNU General Public License v3.0 6 votes vote down vote up
def upload_to_gcs(filenames, gcs_bucket_path):
    """Upload wave file to GCS, at provided path."""

    path_parts = gcs_bucket_path[5:].split('/', 1)
    bucket_name = path_parts[0]
    if len(path_parts) == 1:
        key_prefix = ''
    elif path_parts[1].endswith('/'):
        key_prefix = path_parts[1]
    else:
        key_prefix = path_parts[1] + '/'

    client = storage.Client(project=os.environ["PROJECT_NAME"])
    bucket = client.get_bucket(bucket_name)

    def _upload_files(filenames):
        """Upload a list of files into a specifc subdirectory."""
        for i, filename in enumerate(filenames):
            blob = bucket.blob(key_prefix + os.path.basename(filename))
            blob.upload_from_filename(filename)
            if not i % 5:
                tf.logging.info('Finished uploading file: %s' % filename)

    _upload_files(filenames) 
Example 9
Project: garage   Author: rlworkgroup   File: helper.py    License: MIT License 6 votes vote down vote up
def _upload_to_gcp_storage(exec_dir):
    """Upload all files to GCP storage under exec_dir folder.

    Args:
        exec_dir (str): The execution directory.

    """
    _bucket = storage.Client().bucket('resl-garage-benchmarks')
    exec_name = os.path.basename(exec_dir)

    for folder_name in os.listdir(exec_dir):
        folder_path = os.path.join(exec_dir, folder_name)
        if not os.path.isfile(folder_path):
            remote_folder = os.path.join(exec_name, folder_name)

            for file_name in os.listdir(folder_path):
                file_path = os.path.join(folder_path, file_name)
                if os.path.isfile(file_path):
                    blob = _bucket.blob(os.path.join(remote_folder, file_name))
                    blob.upload_from_filename(file_path) 
Example 10
Project: docker-python   Author: Kaggle   File: kaggle_gcp.py    License: Apache License 2.0 6 votes vote down vote up
def init_gcs():
    is_user_secrets_token_set = "KAGGLE_USER_SECRETS_TOKEN" in os.environ
    from google.cloud import storage
    if not is_user_secrets_token_set:
        return storage

    from kaggle_gcp import get_integrations
    if not get_integrations().has_gcs():
        return storage

    from kaggle_secrets import GcpTarget
    from kaggle_gcp import KaggleKernelCredentials
    monkeypatch_client(
        storage.Client,
        KaggleKernelCredentials(target=GcpTarget.GCS))
    return storage 
Example 11
Project: tensorboardX   Author: lanpa   File: embedding.py    License: MIT License 6 votes vote down vote up
def maybe_upload_file(local_path):
    '''Upload a file to remote cloud storage
    if the path starts with gs:// or s3://
    '''
    if local_path.startswith(('s3://', 'gs://')):
        prefix = local_path.split(':')[0]
        remote_bucket_path = local_path[len("s3://"):]  # same length
        bp = remote_bucket_path.split("/")
        bucket = bp[0]
        path = remote_bucket_path[1 + len(bucket):]

        # s3://example/file becomes s3:/example/file in Linux
        local_path = prefix + ':/' + remote_bucket_path
        if prefix == 's3':
            import boto3
            s3 = boto3.client('s3', endpoint_url=os.environ.get('S3_ENDPOINT'))
            s3.upload_file(local_path, bucket, path)

        elif prefix == 'gs':
            from google.cloud import storage
            client = storage.Client()

            Hbucket = storage.Bucket(client, bucket)
            blob = storage.Blob(path, Hbucket)
            blob.upload_from_filename(local_path) 
Example 12
Project: tensorboardX   Author: lanpa   File: embedding.py    License: MIT License 6 votes vote down vote up
def maybe_upload_file(local_path):
    '''Upload a file to remote cloud storage
    if the path starts with gs:// or s3://
    '''
    if local_path.startswith(('s3://', 'gs://')):
        prefix = local_path.split(':')[0]
        remote_bucket_path = local_path[len("s3://"):]  # same length
        bp = remote_bucket_path.split("/")
        bucket = bp[0]
        path = remote_bucket_path[1 + len(bucket):]

        # s3://example/file becomes s3:/example/file in Linux
        local_path = prefix + ':/' + remote_bucket_path
        if prefix == 's3':
            import boto3
            s3 = boto3.client('s3', endpoint_url=os.environ.get('S3_ENDPOINT'))
            s3.upload_file(local_path, bucket, path)

        elif prefix == 'gs':
            from google.cloud import storage
            client = storage.Client()

            Hbucket = storage.Bucket(client, bucket)
            blob = storage.Blob(path, Hbucket)
            blob.upload_from_filename(local_path) 
Example 13
Project: gpt2-ml   Author: imcaspar   File: prepare_data.py    License: Apache License 2.0 6 votes vote down vote up
def __init__(self, fn):
        self.fn = fn
        if fn.startswith('gs://'):
            from google.cloud import storage
            self.s3client = None
            self.gclient = storage.Client()
            self.storage_dir = TemporaryDirectory()
            self.writer = tf.python_io.TFRecordWriter(
                os.path.join(self.storage_dir.name, 'temp.tfrecord'))
            self.bucket_name, self.file_name = self.fn.split(
                'gs://', 1)[1].split('/', 1)

        else:
            self.s3client = None
            self.gclient = None
            self.bucket_name = None
            self.file_name = None
            self.storage_dir = None
            self.writer = tf.python_io.TFRecordWriter(fn) 
Example 14
Project: model_server   Author: openvinotoolkit   File: gs_model.py    License: Apache License 2.0 6 votes vote down vote up
def gs_download_file(path):
        if path is None:
            return None
        parsed_path = urlparse(path)
        bucket_name = parsed_path.netloc
        file_path = parsed_path.path[1:]
        try:
            gs_client = storage.Client()
            bucket = gs_client.get_bucket(bucket_name)
        except exceptions.DefaultCredentialsError:
            logger.info('Switching to anonymous google storage client')
            gs_client = storage.Client.create_anonymous_client()
            bucket = gs_client.bucket(bucket_name, user_project=None)
        blob = bucket.blob(file_path)
        tmp_path = os.path.join('/tmp', file_path.split(os.sep)[-1])
        blob.download_to_filename(tmp_path)
        return tmp_path 
Example 15
Project: model_server   Author: openvinotoolkit   File: predict.py    License: Apache License 2.0 6 votes vote down vote up
def get_local_file(source_path):
    parsed_path = urlparse(source_path)
    if parsed_path.scheme == "gs":
        bucket_name = parsed_path.netloc
        file_path = parsed_path.path[1:]
        file_name = os.path.split(parsed_path.path)[1]
        try:
            gs_client = storage.Client()
            bucket = gs_client.get_bucket(bucket_name)
        except exceptions.DefaultCredentialsError:
            # if credentials fails, try to connect as anonymous user
            gs_client = storage.Client.create_anonymous_client()
            bucket = gs_client.bucket(bucket_name, user_project=None)
        blob = bucket.blob(file_path)
        blob.download_to_filename(file_name)
    elif parsed_path.scheme == "":
        # in case of local path just pass the input argument
        if os.path.isfile(source_path):
            file_name = source_path
        else:
            print("file " + source_path + "is not accessible")
            file_name = ""
    return file_name 
Example 16
Project: model_server   Author: openvinotoolkit   File: predict.py    License: Apache License 2.0 6 votes vote down vote up
def upload_file(source_file, target_folder):
    parsed_path = urlparse(target_folder)
    if parsed_path.scheme == "gs":
        bucket_name = parsed_path.netloc
        folder_path = parsed_path.path[1:]
        try:
            gs_client = storage.Client()
            bucket = gs_client.get_bucket(bucket_name)
            blob = bucket.blob(folder_path + "/" + source_file)
            blob.upload_from_filename(source_file)
        except Exception as er:
            print(er)
            return False
    elif parsed_path.scheme == "":
        if target_folder != ".":
            copy(source_file, target_folder)
    return True 
Example 17
Project: cwavegan   Author: acheketa   File: backup.py    License: MIT License 5 votes vote down vote up
def list_blobs(bucket_name):
    """Lists all the blobs in the bucket."""
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)

    blobs = bucket.list_blobs()
    return blobs 
Example 18
Project: neural-fingerprinting   Author: StephanZheng   File: cloud_client.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, project_id, bucket_name):
    """Initialize client with project id and name of the storage bucket."""
    self.project_id = project_id
    self.bucket_name = bucket_name
    self.client = storage.Client(project=project_id)
    self.bucket = self.client.get_bucket(bucket_name) 
Example 19
Project: neural-fingerprinting   Author: StephanZheng   File: cloud_client.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, project_id, namespace=None):
    """Init this method with given project id and optional namespace."""
    self._client = datastore.Client(project=project_id, namespace=namespace) 
Example 20
Project: gcp-variant-transforms   Author: googlegenomics   File: vcf_file_composer.py    License: Apache License 2.0 5 votes vote down vote up
def compose_gcs_vcf_shards(project,  # type: str
                           vcf_header_file_path,  # type: str
                           vcf_data_files_folder,  # type: str
                           output_file,  # type: str
                           delete=False,  # type: bool
                          ):
  # type: (...) -> None
  """Composes VCF shards in GCS to one VCF file.

  It composes VCF header and VCF data files to one VCF file, and deletes the
  original VCF shards if `delete` is True.

  Args:
    project: The project name.
    vcf_header_file_path: The path of the VCF header file, it contains the meta
      information, as well as the data header line with the sample names.
    vcf_data_files_folder: The folder that contains all VCF data files.
    output_file: The final VCF file path.
    delete: If true, delete the original VCF shards.
  """
  header_bucket_name, header_blob = gcsio.parse_gcs_path(vcf_header_file_path)
  vcf_data_bucket_name, vcf_data_blob_prefix = gcsio.parse_gcs_path(
      vcf_data_files_folder)

  if vcf_data_bucket_name != header_bucket_name:
    raise ValueError('The VCF data files {} and header file {} are in '
                     'different buckets. '.format(vcf_data_files_folder,
                                                  vcf_header_file_path))

  composed_vcf_data_blob = _compose_vcf_data_files(project,
                                                   vcf_data_files_folder)
  client = storage.Client(project)
  bucket = client.get_bucket(vcf_data_bucket_name)
  output_file_blob = _create_blob(client, output_file)
  output_file_blob.compose([bucket.get_blob(header_blob),
                            composed_vcf_data_blob])
  if delete:
    bucket.delete_blobs(bucket.list_blobs(prefix=vcf_data_blob_prefix))
    bucket.delete_blobs(bucket.list_blobs(prefix=header_blob)) 
Example 21
Project: gcp-variant-transforms   Author: googlegenomics   File: vcf_file_composer.py    License: Apache License 2.0 5 votes vote down vote up
def _create_blob(client, file_path):
  # type: (storage.Client, str) -> storage.Blob
  bucket_name, blob_name = gcsio.parse_gcs_path(file_path)
  file_blob = client.get_bucket(bucket_name).blob(blob_name)
  file_blob.content_type = 'text/plain'
  return file_blob 
Example 22
Project: gcp-variant-transforms   Author: googlegenomics   File: run_preprocessor_tests.py    License: Apache License 2.0 5 votes vote down vote up
def validate_result(self):
    """Validates the results.

    - Checks that the report is generated.
    - Validates report's contents are the same as `expected_contents`.
    - Checks that the resolved headers are generated if `header_blob_name` is
      specified in the test.
    """
    client = storage.Client(self._project)
    bucket = client.get_bucket(_BUCKET_NAME)
    report_blob = bucket.get_blob(self._report_blob_name)
    if not report_blob:
      raise run_tests_common.TestCaseFailure(
          'Report is not generated in {} in test {}'.format(self._report_path,
                                                            self._name))
    contents = report_blob.download_as_string()
    expected_contents = '\n'.join(self._expected_contents)
    if expected_contents != contents:
      raise run_tests_common.TestCaseFailure(
          'Contents mismatch: expected {}, got {} in test {}'.format(
              expected_contents, contents, self._name))
    if not self._keep_reports:
      report_blob.delete()

    if self._header_blob_name:
      resolved_headers_blob = bucket.get_blob(self._header_blob_name)
      if not resolved_headers_blob:
        raise run_tests_common.TestCaseFailure(
            'The resolved header is not generated in {} in test {}'.format(
                self._header_path, self._name))
      if not self._keep_reports:
        resolved_headers_blob.delete() 
Example 23
Project: grover   Author: rowanz   File: prepare_lm_data.py    License: Apache License 2.0 5 votes vote down vote up
def __init__(self, fn):
        self.fn = fn
        if fn.startswith('s3://'):
            from boto3.s3.transfer import TransferConfig
            import boto3
            self.gclient = None
            self.s3client = boto3.client('s3',
                                         )
            self.storage_dir = TemporaryDirectory()
            self.writer = tf.python_io.TFRecordWriter(os.path.join(self.storage_dir.name, 'temp.tfrecord'))
            self.bucket_name, self.file_name = self.fn.split('s3://', 1)[1].split('/', 1)
        elif fn.startswith('gs://'):
            from google.cloud import storage
            self.s3client = None
            self.gclient = storage.Client()
            self.storage_dir = TemporaryDirectory()
            self.writer = tf.python_io.TFRecordWriter(os.path.join(self.storage_dir.name, 'temp.tfrecord'))
            self.bucket_name, self.file_name = self.fn.split('gs://', 1)[1].split('/', 1)

        else:
            self.s3client = None
            self.gclient = None
            self.bucket_name = None
            self.file_name = None
            self.storage_dir = None
            self.writer = tf.python_io.TFRecordWriter(fn) 
Example 24
Project: grover   Author: rowanz   File: validate.py    License: Apache License 2.0 5 votes vote down vote up
def __init__(self, gcloud_name):
        assert gcloud_name.startswith('gs://')
        self.gcloud_name = gcloud_name
        bucket_name, blob_name = gcloud_name.split('gs://')[1].split('/', 1)
        bucket = storage.Client().get_bucket(bucket_name)
        self.blob = bucket.blob(blob_name) 
Example 25
Project: cloudml-edge-automation   Author: GoogleCloudPlatform   File: mark_done.py    License: Apache License 2.0 5 votes vote down vote up
def mark_done(gspath):
    """Uploads a file to the bucket to indicate comletion of training job.
    gspath is a path to the output directory of training such as

    gs://$PROJECT-model-output/$MODEL_NAME/$MODEL_VERSION/output

    """
    url = urlparse(gspath)
    if url.scheme != "gs":
        raise RuntimeError("not a Google Storage URL")
    bucket_name = url.netloc
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(url.path.strip("/") + "/TRAINER-DONE")
    blob.upload_from_string("done") 
Example 26
Project: icrawler   Author: hellock   File: google_storage.py    License: MIT License 5 votes vote down vote up
def __init__(self, root_dir):
        try:
            from google.cloud import storage
        except ImportError:
            print('GoogleStorage backend requires the package '
                  '"google-cloud-storage", execute '
                  '"pip install google-cloud-storage" to install it.')

        self.client = storage.Client()
        bucket_str = root_dir[5:].split('/')[0]
        self.bucket = self.client.get_bucket(bucket_str)
        self.folder_str = root_dir[6 + len(bucket_str):]
        if self.folder_str[0] == '/':
            self.folder_str = self.folder_str[1:] 
Example 27
Project: lm-human-preferences   Author: openai   File: gcs.py    License: MIT License 5 votes vote down vote up
def get_blob(url, client=None):
    if client is None:
        client = storage.Client()
    bucket_name, path = parse_url(url)
    bucket = client.get_bucket(bucket_name)
    return bucket.get_blob(path) 
Example 28
Project: lm-human-preferences   Author: openai   File: gcs.py    License: MIT License 5 votes vote down vote up
def upload_contents(url, contents, client=None):
    """Given a gs:// path, returns contents of the corresponding blob."""
    if client is None:
        client = storage.Client()
    bucket_name, path = parse_url(url)
    bucket = client.get_bucket(bucket_name)
    blob = storage.Blob(path, bucket)
    blob.upload_from_string(contents) 
Example 29
Project: cloudml-samples   Author: GoogleCloudPlatform   File: data_utils.py    License: Apache License 2.0 5 votes vote down vote up
def download_data():
    """Download the data from Google Cloud Storage"""
    # Load the Dataset from the public GCS bucket
    bucket = storage.Client().bucket('cloud-samples-data')
    # Path to the data inside the public bucket
    blob = bucket.blob('ml-engine/sonar/sonar.all-data')
    # Download the data
    blob.download_to_filename('sonar.all-data') 
Example 30
Project: cloudml-samples   Author: GoogleCloudPlatform   File: data_utils.py    License: Apache License 2.0 5 votes vote down vote up
def save_model(model_dir, model_name):
    """Saves the model to Google Cloud Storage"""
    bucket = storage.Client().bucket(model_dir)
    blob = bucket.blob('{}/{}'.format(
        datetime.datetime.now().strftime('sonar_%Y%m%d_%H%M%S'),
        model_name))
    blob.upload_from_filename(model_name)