Python google.cloud.storage.Client() Examples

The following are 30 code examples of google.cloud.storage.Client(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module google.cloud.storage , or try the search function .
Example #1
Source File: bq_writer.py    From lookml-tools with Apache License 2.0 8 votes vote down vote up
def _upload_to_gcs(self, gcs_project_id, target_bucket_name, bucket_folder, filename):
        '''upload CSV to file in GCS

        Args:
            gcs_project_id (str): project name
            target_bucket_name (str): name of GCS bucket
            bucket_folder (str): name of GCS folder
            filename (str): filepath to upload

        Returns:
            nothing. Side effect is that data is uploaded to GCS

        '''
        storage_client = storage.Client(gcs_project_id)
        bucket = storage_client.get_bucket(target_bucket_name)
        path = bucket_folder + os.sep + filename
        logging.info("Loading to GCS: %s", path)
        blob = bucket.blob(path) #name in GCS
        blob.upload_from_filename(filename) 
Example #2
Source File: manage_study.py    From single_cell_portal with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def download_from_bucket(file_path):
    """Downloads file from Google Cloud Storage bucket"""

    path_segments = file_path[5:].split("/")

    storage_client = storage.Client()
    bucket_name = path_segments[0]
    bucket = storage_client.get_bucket(bucket_name)
    source = "/".join(path_segments[1:])

    blob = bucket.blob(source)
    destination = "/tmp/" + source.replace("/", "%2f")
    blob.download_to_filename(destination)
    print(f"{file_path} downloaded to {destination}.")

    return destination 
Example #3
Source File: prepare_data.py    From gpt2-ml with Apache License 2.0 6 votes vote down vote up
def __init__(self, fn):
        self.fn = fn
        if fn.startswith('gs://'):
            from google.cloud import storage
            self.s3client = None
            self.gclient = storage.Client()
            self.storage_dir = TemporaryDirectory()
            self.writer = tf.python_io.TFRecordWriter(
                os.path.join(self.storage_dir.name, 'temp.tfrecord'))
            self.bucket_name, self.file_name = self.fn.split(
                'gs://', 1)[1].split('/', 1)

        else:
            self.s3client = None
            self.gclient = None
            self.bucket_name = None
            self.file_name = None
            self.storage_dir = None
            self.writer = tf.python_io.TFRecordWriter(fn) 
Example #4
Source File: gs_model.py    From model_server with Apache License 2.0 6 votes vote down vote up
def gs_download_file(path):
        if path is None:
            return None
        parsed_path = urlparse(path)
        bucket_name = parsed_path.netloc
        file_path = parsed_path.path[1:]
        try:
            gs_client = storage.Client()
            bucket = gs_client.get_bucket(bucket_name)
        except exceptions.DefaultCredentialsError:
            logger.info('Switching to anonymous google storage client')
            gs_client = storage.Client.create_anonymous_client()
            bucket = gs_client.bucket(bucket_name, user_project=None)
        blob = bucket.blob(file_path)
        tmp_path = os.path.join('/tmp', file_path.split(os.sep)[-1])
        blob.download_to_filename(tmp_path)
        return tmp_path 
Example #5
Source File: predict.py    From model_server with Apache License 2.0 6 votes vote down vote up
def get_local_file(source_path):
    parsed_path = urlparse(source_path)
    if parsed_path.scheme == "gs":
        bucket_name = parsed_path.netloc
        file_path = parsed_path.path[1:]
        file_name = os.path.split(parsed_path.path)[1]
        try:
            gs_client = storage.Client()
            bucket = gs_client.get_bucket(bucket_name)
        except exceptions.DefaultCredentialsError:
            # if credentials fails, try to connect as anonymous user
            gs_client = storage.Client.create_anonymous_client()
            bucket = gs_client.bucket(bucket_name, user_project=None)
        blob = bucket.blob(file_path)
        blob.download_to_filename(file_name)
    elif parsed_path.scheme == "":
        # in case of local path just pass the input argument
        if os.path.isfile(source_path):
            file_name = source_path
        else:
            print("file " + source_path + "is not accessible")
            file_name = ""
    return file_name 
Example #6
Source File: predict.py    From model_server with Apache License 2.0 6 votes vote down vote up
def upload_file(source_file, target_folder):
    parsed_path = urlparse(target_folder)
    if parsed_path.scheme == "gs":
        bucket_name = parsed_path.netloc
        folder_path = parsed_path.path[1:]
        try:
            gs_client = storage.Client()
            bucket = gs_client.get_bucket(bucket_name)
            blob = bucket.blob(folder_path + "/" + source_file)
            blob.upload_from_filename(source_file)
        except Exception as er:
            print(er)
            return False
    elif parsed_path.scheme == "":
        if target_folder != ".":
            copy(source_file, target_folder)
    return True 
Example #7
Source File: helper.py    From garage with MIT License 6 votes vote down vote up
def _upload_to_gcp_storage(exec_dir):
    """Upload all files to GCP storage under exec_dir folder.

    Args:
        exec_dir (str): The execution directory.

    """
    _bucket = storage.Client().bucket('resl-garage-benchmarks')
    exec_name = os.path.basename(exec_dir)

    for folder_name in os.listdir(exec_dir):
        folder_path = os.path.join(exec_dir, folder_name)
        if not os.path.isfile(folder_path):
            remote_folder = os.path.join(exec_name, folder_name)

            for file_name in os.listdir(folder_path):
                file_path = os.path.join(folder_path, file_name)
                if os.path.isfile(file_path):
                    blob = _bucket.blob(os.path.join(remote_folder, file_name))
                    blob.upload_from_filename(file_path) 
Example #8
Source File: Utils.py    From vimss with GNU General Public License v3.0 6 votes vote down vote up
def upload_to_gcs(filenames, gcs_bucket_path):
    """Upload wave file to GCS, at provided path."""

    path_parts = gcs_bucket_path[5:].split('/', 1)
    bucket_name = path_parts[0]
    if len(path_parts) == 1:
        key_prefix = ''
    elif path_parts[1].endswith('/'):
        key_prefix = path_parts[1]
    else:
        key_prefix = path_parts[1] + '/'

    client = storage.Client(project=os.environ["PROJECT_NAME"])
    bucket = client.get_bucket(bucket_name)

    def _upload_files(filenames):
        """Upload a list of files into a specifc subdirectory."""
        for i, filename in enumerate(filenames):
            blob = bucket.blob(key_prefix + os.path.basename(filename))
            blob.upload_from_filename(filename)
            if not i % 5:
                tf.logging.info('Finished uploading file: %s' % filename)

    _upload_files(filenames) 
Example #9
Source File: gcp_report_downloader.py    From koku with GNU Affero General Public License v3.0 6 votes vote down vote up
def __init__(self, task, customer_name, billing_source, **kwargs):
        """
        Constructor.

        Args:
            task           (Object) bound celery object
            customer_name  (str): Name of the customer
            billing_source (dict): dict containing name of GCP storage bucket

        """
        super().__init__(task, **kwargs)

        self.bucket_name = billing_source["bucket"]
        self.report_prefix = billing_source.get("report_prefix", "")
        self.customer_name = customer_name.replace(" ", "_")
        self._provider_uuid = kwargs.get("provider_uuid")

        try:
            GCPProvider().cost_usage_source_is_reachable(None, billing_source)
            self._storage_client = storage.Client()
            self._bucket_info = self._storage_client.lookup_bucket(self.bucket_name)
        except ValidationError as ex:
            msg = f"GCP bucket {self.bucket_name} for customer {customer_name} is not reachable. Error: {str(ex)}"
            LOG.error(log_json(self.request_id, msg, self.context))
            raise GCPReportDownloaderError(str(ex)) 
Example #10
Source File: embedding.py    From tensorboardX with MIT License 6 votes vote down vote up
def maybe_upload_file(local_path):
    '''Upload a file to remote cloud storage
    if the path starts with gs:// or s3://
    '''
    if local_path.startswith(('s3://', 'gs://')):
        prefix = local_path.split(':')[0]
        remote_bucket_path = local_path[len("s3://"):]  # same length
        bp = remote_bucket_path.split("/")
        bucket = bp[0]
        path = remote_bucket_path[1 + len(bucket):]

        # s3://example/file becomes s3:/example/file in Linux
        local_path = prefix + ':/' + remote_bucket_path
        if prefix == 's3':
            import boto3
            s3 = boto3.client('s3', endpoint_url=os.environ.get('S3_ENDPOINT'))
            s3.upload_file(local_path, bucket, path)

        elif prefix == 'gs':
            from google.cloud import storage
            client = storage.Client()

            Hbucket = storage.Bucket(client, bucket)
            blob = storage.Blob(path, Hbucket)
            blob.upload_from_filename(local_path) 
Example #11
Source File: storage.py    From loaner with Apache License 2.0 6 votes vote down vote up
def from_config(cls, config, creds=None):
    """Returns an initialized CloudStorageAPI object.

    Args:
      config: common.ProjectConfig, the project configuration.
      creds: auth.CloudCredentials, the credentials to use for client
          authentication.

    Returns:
      An authenticated CloudStorageAPI instance.
    """
    if creds is None:
      creds = auth.CloudCredentials(config, cls.SCOPES)
    client = storage.Client(
        project=config.project, credentials=creds.get_credentials(cls.SCOPES))
    return cls(config, client) 
Example #12
Source File: provider.py    From koku with GNU Affero General Public License v3.0 6 votes vote down vote up
def cost_usage_source_is_reachable(self, credential_name, data_source):
        """
        Verify that the GCP bucket exists and is reachable.

        Args:
            credential_name (object): not used; only present for interface compatibility
            data_source (dict): dict containing name of GCP storage bucket

        """
        storage_client = storage.Client()
        bucket = data_source["bucket"]
        try:
            bucket_info = storage_client.lookup_bucket(bucket)
            if not bucket_info:
                # if the lookup does not return anything, then this is an nonexistent bucket
                key = "billing_source.bucket"
                message = f"The provided GCP bucket {bucket} does not exist"
                raise serializers.ValidationError(error_obj(key, message))

        except GoogleCloudError as e:
            key = "billing_source.bucket"
            raise serializers.ValidationError(error_obj(key, e.message))

        return True 
Example #13
Source File: vcf_file_composer.py    From gcp-variant-transforms with Apache License 2.0 6 votes vote down vote up
def __init__(self, project, bucket_name, blob_prefix):
    # type: (str, str, str) -> None
    """Initializes a `MultiProcessComposer`.

    This class composes all blobs that start with `blob_prefix` to one.

    Args:
      project: The project name.
      bucket_name: The name of the bucket where the blob components and the new
        composite are saved.
      blob_prefix: The prefix used to filter blobs. Only the blobs with this
        prefix will be composed.
    """
    self._project = project
    self._bucket_name = bucket_name
    self._blob_prefix = blob_prefix
    self._bucket = storage.Client(project).get_bucket(bucket_name) 
Example #14
Source File: embedding.py    From tensorboardX with MIT License 6 votes vote down vote up
def maybe_upload_file(local_path):
    '''Upload a file to remote cloud storage
    if the path starts with gs:// or s3://
    '''
    if local_path.startswith(('s3://', 'gs://')):
        prefix = local_path.split(':')[0]
        remote_bucket_path = local_path[len("s3://"):]  # same length
        bp = remote_bucket_path.split("/")
        bucket = bp[0]
        path = remote_bucket_path[1 + len(bucket):]

        # s3://example/file becomes s3:/example/file in Linux
        local_path = prefix + ':/' + remote_bucket_path
        if prefix == 's3':
            import boto3
            s3 = boto3.client('s3', endpoint_url=os.environ.get('S3_ENDPOINT'))
            s3.upload_file(local_path, bucket, path)

        elif prefix == 'gs':
            from google.cloud import storage
            client = storage.Client()

            Hbucket = storage.Bucket(client, bucket)
            blob = storage.Blob(path, Hbucket)
            blob.upload_from_filename(local_path) 
Example #15
Source File: vcf_file_composer.py    From gcp-variant-transforms with Apache License 2.0 6 votes vote down vote up
def _compose_files(project, bucket_name, blob_names, composite_name):
  # type: (str, str, List[str], str) -> None
  """Composes multiple files (up to 32 objects) in GCS to one.

  Args:
    project: The project name.
    bucket_name: The name of the bucket where the `components` and the new
      composite are saved.
    blob_names: A list of blob object names.
    composite_name: Name of the new composite.
  """
  bucket = storage.Client(project).get_bucket(bucket_name)
  output_file_blob = bucket.blob(composite_name)
  output_file_blob.content_type = 'text/plain'
  blobs = [bucket.get_blob(blob_name) for blob_name in blob_names]
  output_file_blob.compose(blobs) 
Example #16
Source File: kaggle_gcp.py    From docker-python with Apache License 2.0 6 votes vote down vote up
def init_gcs():
    is_user_secrets_token_set = "KAGGLE_USER_SECRETS_TOKEN" in os.environ
    from google.cloud import storage
    if not is_user_secrets_token_set:
        return storage

    from kaggle_gcp import get_integrations
    if not get_integrations().has_gcs():
        return storage

    from kaggle_secrets import GcpTarget
    from kaggle_gcp import KaggleKernelCredentials
    monkeypatch_client(
        storage.Client,
        KaggleKernelCredentials(target=GcpTarget.GCS))
    return storage 
Example #17
Source File: storage.py    From InfraBox with Apache License 2.0 5 votes vote down vote up
def _upload(self, stream, key):
        client = gcs.Client()
        bucket = client.get_bucket(self.bucket)
        blob = bucket.blob(key)
        blob.upload_from_file(stream) 
Example #18
Source File: storage.py    From InfraBox with Apache License 2.0 5 votes vote down vote up
def exists(self, key):
        client = gcs.Client()
        bucket = client.get_bucket(self.bucket)
        blob = bucket.blob(key)
        return blob.exists() 
Example #19
Source File: storage.py    From InfraBox with Apache License 2.0 5 votes vote down vote up
def _download(self, key):
        client = gcs.Client()
        bucket = client.get_bucket(self.bucket)
        blob = bucket.get_blob(key)

        if not blob:
            return None

        path = '/tmp/%s' % uuid.uuid4()
        with open(path, 'w+') as f:
            blob.download_to_file(f)

        self._clean_up(path)

        return path 
Example #20
Source File: utils.py    From recsys2019 with Apache License 2.0 5 votes vote down vote up
def upload_data(src_path, dst_path):
    client = storage.Client()
    bucket = client.get_bucket("logicai-recsys2019")
    blob = bucket.blob(dst_path)
    blob.upload_from_filename(src_path) 
Example #21
Source File: psi.py    From python-script with Apache License 2.0 5 votes vote down vote up
def save(url, report):
    '''Save to https://console.cloud.google.com/storage/browser/[bucket-id]/'''
    client = storage.Client()
    bucket = client.get_bucket("psi-report")
    blob = Blob(f"${parse.quote_plus(url)}.json", bucket)
    blob.upload_from_string(report, "application/json") 
Example #22
Source File: utils.py    From recsys2019 with Apache License 2.0 5 votes vote down vote up
def download_data(src_path, dst_path):
    client = storage.Client()
    bucket = client.get_bucket("logicai-recsys2019")
    blob = bucket.get_blob(src_path)
    blob.download_to_filename(dst_path) 
Example #23
Source File: imagenet_to_gcs.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def upload_to_gcs(training_records, validation_records):
  """Upload TF-Record files to GCS, at provided path."""

  # Find the GCS bucket_name and key_prefix for dataset files
  path_parts = FLAGS.gcs_output_path[5:].split('/', 1)
  bucket_name = path_parts[0]
  if len(path_parts) == 1:
    key_prefix = ''
  elif path_parts[1].endswith('/'):
    key_prefix = path_parts[1]
  else:
    key_prefix = path_parts[1] + '/'

  client = storage.Client(project=FLAGS.project)
  bucket = client.get_bucket(bucket_name)

  def _upload_files(filenames):
    """Upload a list of files into a specifc subdirectory."""
    for i, filename in enumerate(sorted(filenames)):
      blob = bucket.blob(key_prefix + os.path.basename(filename))
      blob.upload_from_filename(filename)
      if not i % 20:
        tf.logging.info('Finished uploading file: %s' % filename)

  # Upload training dataset
  tf.logging.info('Uploading the training data.')
  _upload_files(training_records)

  # Upload validation dataset
  tf.logging.info('Uploading the validation data.')
  _upload_files(validation_records) 
Example #24
Source File: simple_eval.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def run():
    """ Get the models from GCS and then have them play eachother. """
    credentials = service_account.Credentials.from_service_account_file(
        SERVICE_ACCOUNT_KEY_LOCATION)
    scoped_credentials = credentials.with_scopes(
        ['https://www.googleapis.com/auth/cloud-platform'])

    # Use the hand-crafted GCS client
    storage_client = storage.Client(credentials=credentials)
    bucket = storage_client.get_bucket(BUCKET_NAME)
    blobs = bucket.list_blobs(prefix=MODEL_DIR)

    models = []
    seen_models = set()
    model_reg = re.compile('\d{6}-\w+')
    for b in blobs:
        match = model_reg.search(b.name)
        if match and not match.group(0) in seen_models:
            seen_models.add(match.group(0))
            models.append(match.group(0))

    # Now that we have all the models, we can pit them against eachother them.
    # For now, just pick the last two.
    p1, p2 = None, None
    if len(models) == 0:
        sys.stderr.write('No models found!\n')
        sys.exit(1)
    elif len(models) == 1:
        p1, p2 = models[0], models[0]
    else:
        p1, p2 = models[-1], models[-2]

    play_matches(p1, p2) 
Example #25
Source File: simple_eval.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def run():
    """ Get the models from GCS and then have them play eachother. """
    credentials = service_account.Credentials.from_service_account_file(
        SERVICE_ACCOUNT_KEY_LOCATION)
    scoped_credentials = credentials.with_scopes(
        ['https://www.googleapis.com/auth/cloud-platform'])

    # Use the hand-crafted GCS client
    storage_client = storage.Client(credentials=credentials)
    bucket = storage_client.get_bucket(BUCKET_NAME)
    blobs = bucket.list_blobs(prefix=MODEL_DIR)

    models = []
    seen_models = set()
    model_reg = re.compile('\d{6}-\w+')
    for b in blobs:
        match = model_reg.search(b.name)
        if match and not match.group(0) in seen_models:
            seen_models.add(match.group(0))
            models.append(match.group(0))

    # Now that we have all the models, we can pit them against eachother them.
    # For now, just pick the last two.
    p1, p2 = None, None
    if len(models) == 0:
        sys.stderr.write('No models found!\n')
        sys.exit(1)
    elif len(models) == 1:
        p1, p2 = models[0], models[0]
    else:
        p1, p2 = models[-1], models[-2]

    play_matches(p1, p2) 
Example #26
Source File: gcs.py    From airflow with Apache License 2.0 5 votes vote down vote up
def get_conn(self):
        """
        Returns a Google Cloud Storage service object.
        """
        if not self._conn:
            self._conn = storage.Client(credentials=self._get_credentials(),
                                        client_info=self.client_info,
                                        project=self.project_id)

        return self._conn 
Example #27
Source File: record_writer.py    From tensorboardX with MIT License 5 votes vote down vote up
def __init__(self, path):
        if not GCS_ENABLED:
            raise ImportError("`google-cloud-storage` must be installed in order to use "
                              "the 'gs://' protocol")

        self.path = path
        self.buffer = io.BytesIO()

        client = storage.Client()
        bucket_name, filepath = self.bucket_and_path()
        bucket = storage.Bucket(client, bucket_name)
        self.blob = storage.Blob(filepath, bucket) 
Example #28
Source File: record_writer.py    From tensorboardX with MIT License 5 votes vote down vote up
def __init__(self, path):
        if not GCS_ENABLED:
            raise ImportError("`google-cloud-storage` must be installed in order to use "
                              "the 'gs://' protocol")

        self.path = path
        self.buffer = io.BytesIO()

        client = storage.Client()
        bucket_name, filepath = self.bucket_and_path()
        bucket = storage.Bucket(client, bucket_name)
        self.blob = storage.Blob(filepath, bucket) 
Example #29
Source File: test_automl.py    From docker-python with Apache License 2.0 5 votes vote down vote up
def test_tables_gcs_client(self):
        # The GcsClient can't currently be monkeypatched for default
        # credentials because it requires a project which can't be set.
        # Verify that creating an automl_v1beta1.GcsClient given an actual
        # storage.Client sets the client properly.
        gcs_client = storage.Client(project="xyz", credentials=_make_credentials())
        tables_gcs_client = automl_v1beta1.GcsClient(client=gcs_client)
        self.assertIs(tables_gcs_client.client, gcs_client) 
Example #30
Source File: test_gcs.py    From docker-python with Apache License 2.0 5 votes vote down vote up
def test_default_credentials_gcs_enabled(self):
        env = EnvironmentVarGuard()
        env.set('KAGGLE_USER_SECRETS_TOKEN', 'foobar')
        env.set('KAGGLE_KERNEL_INTEGRATIONS', 'GCS')
        with env:
            init_gcs()
            client = storage.Client(project="xyz")
            self.assertIsInstance(client._credentials, KaggleKernelCredentials)
            self.assertTrue(client._connection.user_agent.startswith("kaggle-gcp-client/1.0"))