Python google.cloud.storage.Client() Examples
The following are 30
code examples of google.cloud.storage.Client().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
google.cloud.storage
, or try the search function
.
Example #1
Source File: bq_writer.py From lookml-tools with Apache License 2.0 | 8 votes |
def _upload_to_gcs(self, gcs_project_id, target_bucket_name, bucket_folder, filename): '''upload CSV to file in GCS Args: gcs_project_id (str): project name target_bucket_name (str): name of GCS bucket bucket_folder (str): name of GCS folder filename (str): filepath to upload Returns: nothing. Side effect is that data is uploaded to GCS ''' storage_client = storage.Client(gcs_project_id) bucket = storage_client.get_bucket(target_bucket_name) path = bucket_folder + os.sep + filename logging.info("Loading to GCS: %s", path) blob = bucket.blob(path) #name in GCS blob.upload_from_filename(filename)
Example #2
Source File: manage_study.py From single_cell_portal with BSD 3-Clause "New" or "Revised" License | 6 votes |
def download_from_bucket(file_path): """Downloads file from Google Cloud Storage bucket""" path_segments = file_path[5:].split("/") storage_client = storage.Client() bucket_name = path_segments[0] bucket = storage_client.get_bucket(bucket_name) source = "/".join(path_segments[1:]) blob = bucket.blob(source) destination = "/tmp/" + source.replace("/", "%2f") blob.download_to_filename(destination) print(f"{file_path} downloaded to {destination}.") return destination
Example #3
Source File: prepare_data.py From gpt2-ml with Apache License 2.0 | 6 votes |
def __init__(self, fn): self.fn = fn if fn.startswith('gs://'): from google.cloud import storage self.s3client = None self.gclient = storage.Client() self.storage_dir = TemporaryDirectory() self.writer = tf.python_io.TFRecordWriter( os.path.join(self.storage_dir.name, 'temp.tfrecord')) self.bucket_name, self.file_name = self.fn.split( 'gs://', 1)[1].split('/', 1) else: self.s3client = None self.gclient = None self.bucket_name = None self.file_name = None self.storage_dir = None self.writer = tf.python_io.TFRecordWriter(fn)
Example #4
Source File: gs_model.py From model_server with Apache License 2.0 | 6 votes |
def gs_download_file(path): if path is None: return None parsed_path = urlparse(path) bucket_name = parsed_path.netloc file_path = parsed_path.path[1:] try: gs_client = storage.Client() bucket = gs_client.get_bucket(bucket_name) except exceptions.DefaultCredentialsError: logger.info('Switching to anonymous google storage client') gs_client = storage.Client.create_anonymous_client() bucket = gs_client.bucket(bucket_name, user_project=None) blob = bucket.blob(file_path) tmp_path = os.path.join('/tmp', file_path.split(os.sep)[-1]) blob.download_to_filename(tmp_path) return tmp_path
Example #5
Source File: predict.py From model_server with Apache License 2.0 | 6 votes |
def get_local_file(source_path): parsed_path = urlparse(source_path) if parsed_path.scheme == "gs": bucket_name = parsed_path.netloc file_path = parsed_path.path[1:] file_name = os.path.split(parsed_path.path)[1] try: gs_client = storage.Client() bucket = gs_client.get_bucket(bucket_name) except exceptions.DefaultCredentialsError: # if credentials fails, try to connect as anonymous user gs_client = storage.Client.create_anonymous_client() bucket = gs_client.bucket(bucket_name, user_project=None) blob = bucket.blob(file_path) blob.download_to_filename(file_name) elif parsed_path.scheme == "": # in case of local path just pass the input argument if os.path.isfile(source_path): file_name = source_path else: print("file " + source_path + "is not accessible") file_name = "" return file_name
Example #6
Source File: predict.py From model_server with Apache License 2.0 | 6 votes |
def upload_file(source_file, target_folder): parsed_path = urlparse(target_folder) if parsed_path.scheme == "gs": bucket_name = parsed_path.netloc folder_path = parsed_path.path[1:] try: gs_client = storage.Client() bucket = gs_client.get_bucket(bucket_name) blob = bucket.blob(folder_path + "/" + source_file) blob.upload_from_filename(source_file) except Exception as er: print(er) return False elif parsed_path.scheme == "": if target_folder != ".": copy(source_file, target_folder) return True
Example #7
Source File: helper.py From garage with MIT License | 6 votes |
def _upload_to_gcp_storage(exec_dir): """Upload all files to GCP storage under exec_dir folder. Args: exec_dir (str): The execution directory. """ _bucket = storage.Client().bucket('resl-garage-benchmarks') exec_name = os.path.basename(exec_dir) for folder_name in os.listdir(exec_dir): folder_path = os.path.join(exec_dir, folder_name) if not os.path.isfile(folder_path): remote_folder = os.path.join(exec_name, folder_name) for file_name in os.listdir(folder_path): file_path = os.path.join(folder_path, file_name) if os.path.isfile(file_path): blob = _bucket.blob(os.path.join(remote_folder, file_name)) blob.upload_from_filename(file_path)
Example #8
Source File: Utils.py From vimss with GNU General Public License v3.0 | 6 votes |
def upload_to_gcs(filenames, gcs_bucket_path): """Upload wave file to GCS, at provided path.""" path_parts = gcs_bucket_path[5:].split('/', 1) bucket_name = path_parts[0] if len(path_parts) == 1: key_prefix = '' elif path_parts[1].endswith('/'): key_prefix = path_parts[1] else: key_prefix = path_parts[1] + '/' client = storage.Client(project=os.environ["PROJECT_NAME"]) bucket = client.get_bucket(bucket_name) def _upload_files(filenames): """Upload a list of files into a specifc subdirectory.""" for i, filename in enumerate(filenames): blob = bucket.blob(key_prefix + os.path.basename(filename)) blob.upload_from_filename(filename) if not i % 5: tf.logging.info('Finished uploading file: %s' % filename) _upload_files(filenames)
Example #9
Source File: gcp_report_downloader.py From koku with GNU Affero General Public License v3.0 | 6 votes |
def __init__(self, task, customer_name, billing_source, **kwargs): """ Constructor. Args: task (Object) bound celery object customer_name (str): Name of the customer billing_source (dict): dict containing name of GCP storage bucket """ super().__init__(task, **kwargs) self.bucket_name = billing_source["bucket"] self.report_prefix = billing_source.get("report_prefix", "") self.customer_name = customer_name.replace(" ", "_") self._provider_uuid = kwargs.get("provider_uuid") try: GCPProvider().cost_usage_source_is_reachable(None, billing_source) self._storage_client = storage.Client() self._bucket_info = self._storage_client.lookup_bucket(self.bucket_name) except ValidationError as ex: msg = f"GCP bucket {self.bucket_name} for customer {customer_name} is not reachable. Error: {str(ex)}" LOG.error(log_json(self.request_id, msg, self.context)) raise GCPReportDownloaderError(str(ex))
Example #10
Source File: embedding.py From tensorboardX with MIT License | 6 votes |
def maybe_upload_file(local_path): '''Upload a file to remote cloud storage if the path starts with gs:// or s3:// ''' if local_path.startswith(('s3://', 'gs://')): prefix = local_path.split(':')[0] remote_bucket_path = local_path[len("s3://"):] # same length bp = remote_bucket_path.split("/") bucket = bp[0] path = remote_bucket_path[1 + len(bucket):] # s3://example/file becomes s3:/example/file in Linux local_path = prefix + ':/' + remote_bucket_path if prefix == 's3': import boto3 s3 = boto3.client('s3', endpoint_url=os.environ.get('S3_ENDPOINT')) s3.upload_file(local_path, bucket, path) elif prefix == 'gs': from google.cloud import storage client = storage.Client() Hbucket = storage.Bucket(client, bucket) blob = storage.Blob(path, Hbucket) blob.upload_from_filename(local_path)
Example #11
Source File: storage.py From loaner with Apache License 2.0 | 6 votes |
def from_config(cls, config, creds=None): """Returns an initialized CloudStorageAPI object. Args: config: common.ProjectConfig, the project configuration. creds: auth.CloudCredentials, the credentials to use for client authentication. Returns: An authenticated CloudStorageAPI instance. """ if creds is None: creds = auth.CloudCredentials(config, cls.SCOPES) client = storage.Client( project=config.project, credentials=creds.get_credentials(cls.SCOPES)) return cls(config, client)
Example #12
Source File: provider.py From koku with GNU Affero General Public License v3.0 | 6 votes |
def cost_usage_source_is_reachable(self, credential_name, data_source): """ Verify that the GCP bucket exists and is reachable. Args: credential_name (object): not used; only present for interface compatibility data_source (dict): dict containing name of GCP storage bucket """ storage_client = storage.Client() bucket = data_source["bucket"] try: bucket_info = storage_client.lookup_bucket(bucket) if not bucket_info: # if the lookup does not return anything, then this is an nonexistent bucket key = "billing_source.bucket" message = f"The provided GCP bucket {bucket} does not exist" raise serializers.ValidationError(error_obj(key, message)) except GoogleCloudError as e: key = "billing_source.bucket" raise serializers.ValidationError(error_obj(key, e.message)) return True
Example #13
Source File: vcf_file_composer.py From gcp-variant-transforms with Apache License 2.0 | 6 votes |
def __init__(self, project, bucket_name, blob_prefix): # type: (str, str, str) -> None """Initializes a `MultiProcessComposer`. This class composes all blobs that start with `blob_prefix` to one. Args: project: The project name. bucket_name: The name of the bucket where the blob components and the new composite are saved. blob_prefix: The prefix used to filter blobs. Only the blobs with this prefix will be composed. """ self._project = project self._bucket_name = bucket_name self._blob_prefix = blob_prefix self._bucket = storage.Client(project).get_bucket(bucket_name)
Example #14
Source File: embedding.py From tensorboardX with MIT License | 6 votes |
def maybe_upload_file(local_path): '''Upload a file to remote cloud storage if the path starts with gs:// or s3:// ''' if local_path.startswith(('s3://', 'gs://')): prefix = local_path.split(':')[0] remote_bucket_path = local_path[len("s3://"):] # same length bp = remote_bucket_path.split("/") bucket = bp[0] path = remote_bucket_path[1 + len(bucket):] # s3://example/file becomes s3:/example/file in Linux local_path = prefix + ':/' + remote_bucket_path if prefix == 's3': import boto3 s3 = boto3.client('s3', endpoint_url=os.environ.get('S3_ENDPOINT')) s3.upload_file(local_path, bucket, path) elif prefix == 'gs': from google.cloud import storage client = storage.Client() Hbucket = storage.Bucket(client, bucket) blob = storage.Blob(path, Hbucket) blob.upload_from_filename(local_path)
Example #15
Source File: vcf_file_composer.py From gcp-variant-transforms with Apache License 2.0 | 6 votes |
def _compose_files(project, bucket_name, blob_names, composite_name): # type: (str, str, List[str], str) -> None """Composes multiple files (up to 32 objects) in GCS to one. Args: project: The project name. bucket_name: The name of the bucket where the `components` and the new composite are saved. blob_names: A list of blob object names. composite_name: Name of the new composite. """ bucket = storage.Client(project).get_bucket(bucket_name) output_file_blob = bucket.blob(composite_name) output_file_blob.content_type = 'text/plain' blobs = [bucket.get_blob(blob_name) for blob_name in blob_names] output_file_blob.compose(blobs)
Example #16
Source File: kaggle_gcp.py From docker-python with Apache License 2.0 | 6 votes |
def init_gcs(): is_user_secrets_token_set = "KAGGLE_USER_SECRETS_TOKEN" in os.environ from google.cloud import storage if not is_user_secrets_token_set: return storage from kaggle_gcp import get_integrations if not get_integrations().has_gcs(): return storage from kaggle_secrets import GcpTarget from kaggle_gcp import KaggleKernelCredentials monkeypatch_client( storage.Client, KaggleKernelCredentials(target=GcpTarget.GCS)) return storage
Example #17
Source File: storage.py From InfraBox with Apache License 2.0 | 5 votes |
def _upload(self, stream, key): client = gcs.Client() bucket = client.get_bucket(self.bucket) blob = bucket.blob(key) blob.upload_from_file(stream)
Example #18
Source File: storage.py From InfraBox with Apache License 2.0 | 5 votes |
def exists(self, key): client = gcs.Client() bucket = client.get_bucket(self.bucket) blob = bucket.blob(key) return blob.exists()
Example #19
Source File: storage.py From InfraBox with Apache License 2.0 | 5 votes |
def _download(self, key): client = gcs.Client() bucket = client.get_bucket(self.bucket) blob = bucket.get_blob(key) if not blob: return None path = '/tmp/%s' % uuid.uuid4() with open(path, 'w+') as f: blob.download_to_file(f) self._clean_up(path) return path
Example #20
Source File: utils.py From recsys2019 with Apache License 2.0 | 5 votes |
def upload_data(src_path, dst_path): client = storage.Client() bucket = client.get_bucket("logicai-recsys2019") blob = bucket.blob(dst_path) blob.upload_from_filename(src_path)
Example #21
Source File: psi.py From python-script with Apache License 2.0 | 5 votes |
def save(url, report): '''Save to https://console.cloud.google.com/storage/browser/[bucket-id]/''' client = storage.Client() bucket = client.get_bucket("psi-report") blob = Blob(f"${parse.quote_plus(url)}.json", bucket) blob.upload_from_string(report, "application/json")
Example #22
Source File: utils.py From recsys2019 with Apache License 2.0 | 5 votes |
def download_data(src_path, dst_path): client = storage.Client() bucket = client.get_bucket("logicai-recsys2019") blob = bucket.get_blob(src_path) blob.download_to_filename(dst_path)
Example #23
Source File: imagenet_to_gcs.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def upload_to_gcs(training_records, validation_records): """Upload TF-Record files to GCS, at provided path.""" # Find the GCS bucket_name and key_prefix for dataset files path_parts = FLAGS.gcs_output_path[5:].split('/', 1) bucket_name = path_parts[0] if len(path_parts) == 1: key_prefix = '' elif path_parts[1].endswith('/'): key_prefix = path_parts[1] else: key_prefix = path_parts[1] + '/' client = storage.Client(project=FLAGS.project) bucket = client.get_bucket(bucket_name) def _upload_files(filenames): """Upload a list of files into a specifc subdirectory.""" for i, filename in enumerate(sorted(filenames)): blob = bucket.blob(key_prefix + os.path.basename(filename)) blob.upload_from_filename(filename) if not i % 20: tf.logging.info('Finished uploading file: %s' % filename) # Upload training dataset tf.logging.info('Uploading the training data.') _upload_files(training_records) # Upload validation dataset tf.logging.info('Uploading the validation data.') _upload_files(validation_records)
Example #24
Source File: simple_eval.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def run(): """ Get the models from GCS and then have them play eachother. """ credentials = service_account.Credentials.from_service_account_file( SERVICE_ACCOUNT_KEY_LOCATION) scoped_credentials = credentials.with_scopes( ['https://www.googleapis.com/auth/cloud-platform']) # Use the hand-crafted GCS client storage_client = storage.Client(credentials=credentials) bucket = storage_client.get_bucket(BUCKET_NAME) blobs = bucket.list_blobs(prefix=MODEL_DIR) models = [] seen_models = set() model_reg = re.compile('\d{6}-\w+') for b in blobs: match = model_reg.search(b.name) if match and not match.group(0) in seen_models: seen_models.add(match.group(0)) models.append(match.group(0)) # Now that we have all the models, we can pit them against eachother them. # For now, just pick the last two. p1, p2 = None, None if len(models) == 0: sys.stderr.write('No models found!\n') sys.exit(1) elif len(models) == 1: p1, p2 = models[0], models[0] else: p1, p2 = models[-1], models[-2] play_matches(p1, p2)
Example #25
Source File: simple_eval.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def run(): """ Get the models from GCS and then have them play eachother. """ credentials = service_account.Credentials.from_service_account_file( SERVICE_ACCOUNT_KEY_LOCATION) scoped_credentials = credentials.with_scopes( ['https://www.googleapis.com/auth/cloud-platform']) # Use the hand-crafted GCS client storage_client = storage.Client(credentials=credentials) bucket = storage_client.get_bucket(BUCKET_NAME) blobs = bucket.list_blobs(prefix=MODEL_DIR) models = [] seen_models = set() model_reg = re.compile('\d{6}-\w+') for b in blobs: match = model_reg.search(b.name) if match and not match.group(0) in seen_models: seen_models.add(match.group(0)) models.append(match.group(0)) # Now that we have all the models, we can pit them against eachother them. # For now, just pick the last two. p1, p2 = None, None if len(models) == 0: sys.stderr.write('No models found!\n') sys.exit(1) elif len(models) == 1: p1, p2 = models[0], models[0] else: p1, p2 = models[-1], models[-2] play_matches(p1, p2)
Example #26
Source File: gcs.py From airflow with Apache License 2.0 | 5 votes |
def get_conn(self): """ Returns a Google Cloud Storage service object. """ if not self._conn: self._conn = storage.Client(credentials=self._get_credentials(), client_info=self.client_info, project=self.project_id) return self._conn
Example #27
Source File: record_writer.py From tensorboardX with MIT License | 5 votes |
def __init__(self, path): if not GCS_ENABLED: raise ImportError("`google-cloud-storage` must be installed in order to use " "the 'gs://' protocol") self.path = path self.buffer = io.BytesIO() client = storage.Client() bucket_name, filepath = self.bucket_and_path() bucket = storage.Bucket(client, bucket_name) self.blob = storage.Blob(filepath, bucket)
Example #28
Source File: record_writer.py From tensorboardX with MIT License | 5 votes |
def __init__(self, path): if not GCS_ENABLED: raise ImportError("`google-cloud-storage` must be installed in order to use " "the 'gs://' protocol") self.path = path self.buffer = io.BytesIO() client = storage.Client() bucket_name, filepath = self.bucket_and_path() bucket = storage.Bucket(client, bucket_name) self.blob = storage.Blob(filepath, bucket)
Example #29
Source File: test_automl.py From docker-python with Apache License 2.0 | 5 votes |
def test_tables_gcs_client(self): # The GcsClient can't currently be monkeypatched for default # credentials because it requires a project which can't be set. # Verify that creating an automl_v1beta1.GcsClient given an actual # storage.Client sets the client properly. gcs_client = storage.Client(project="xyz", credentials=_make_credentials()) tables_gcs_client = automl_v1beta1.GcsClient(client=gcs_client) self.assertIs(tables_gcs_client.client, gcs_client)
Example #30
Source File: test_gcs.py From docker-python with Apache License 2.0 | 5 votes |
def test_default_credentials_gcs_enabled(self): env = EnvironmentVarGuard() env.set('KAGGLE_USER_SECRETS_TOKEN', 'foobar') env.set('KAGGLE_KERNEL_INTEGRATIONS', 'GCS') with env: init_gcs() client = storage.Client(project="xyz") self.assertIsInstance(client._credentials, KaggleKernelCredentials) self.assertTrue(client._connection.user_agent.startswith("kaggle-gcp-client/1.0"))