Python google.cloud.storage.Blob() Examples

The following are code examples for showing how to use google.cloud.storage.Blob(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: content   Author: demisto   File: GoogleCloudStorage.py    MIT License 7 votes vote down vote up
def blob2dict(blob):
    """Converts a google.cloud.storage.Blob (which represents a storage object) to context format (GCS.BucketObject)."""
    return {
        'Name': blob.name,
        'Bucket': blob.bucket.name,
        'ContentType': blob.content_type,
        'TimeCreated': datetime2str(blob.time_created),
        'TimeUpdated': datetime2str(blob.updated),
        'TimeDeleted': datetime2str(blob.time_deleted),
        'Size': blob.size,
        'MD5': blob.md5_hash,
        'OwnerID': '' if not blob.owner else blob.owner.get('entityId', ''),
        'CRC32c': blob.crc32c,
        'EncryptionAlgorithm': blob._properties.get('customerEncryption', {}).get('encryptionAlgorithm', ''),
        'EncryptionKeySHA256': blob._properties.get('customerEncryption', {}).get('keySha256', ''),
    } 
Example 2
Project: eclipse2017   Author: google   File: pipeline.py    Apache License 2.0 7 votes vote down vote up
def upload(self, fnames):
        """
        Uploads a list of Movie entities to the datastore and uploads the
        corresponding movie files to Cloud Storage.
        """

        # Name movies based on time created
        movie_dir = datetime.now().strftime("%Y-%m-%d %H:%M")
        movie_name = 'movie-{0}.mp4'.format(movie_dir)

        # Upload movie to Cloud Storage
        bucket = self.storage.get_bucket(config.GCS_MOVIE_BUCKET)
        blob = storage.Blob('{0}/{1}'.format(movie_dir, movie_name), bucket)

        with open(constants.MOVIE_FPATH, 'r') as f:
            try:
                blob.upload_from_file(f)
                msg = 'Successfully uploaded {0} to Cloud Storage'
                logging.info(msg.format(constants.MOVIE_FPATH))
            except Exception, e:
                msg = 'Failed to upload {0} to Cloud Storage: {1}'
                logging.error(msg.format(constants.MOVIE_FPATH, e))
                return False 
Example 3
Project: loaner   Author: google   File: storage.py    Apache License 2.0 6 votes vote down vote up
def get_blob(self, path, bucket_name=None):
    """Retrieves a json encoded Blob from Google Cloud Storage as a dictionary.

    Args:
      path: str, the path of the Blob to retrieve relative to the root of the
          Google Cloud Storage Bucket including the name of the Blob.
      bucket_name: str, the name of the Google Cloud Storage Bucket to retrieve
          the Blob from.

    Returns:
      A dictionary of the Blob from Google Cloud Storage.

    Raises:
      NotFoundError: when the path provided is not associated with a Blob in the
          Google Cloud Storage Bucket.
    """
    bucket_name = bucket_name or self._config.bucket

    blob = self.get_bucket(bucket_name).get_blob(path, self._client)

    try:
      contents = blob.download_as_string(self._client)
    except (AttributeError, exceptions.NotFound) as err:
      logging.error(_GET_BLOB_ERROR_MSG, path, bucket_name, err)
      raise NotFoundError(_GET_BLOB_ERROR_MSG % (path, bucket_name, err))
    return json.loads(contents) 
Example 4
Project: servicelayer   Author: alephdata   File: gs.py    MIT License 6 votes vote down vote up
def _locate_blob(self, content_hash):
        """Check if a file with the given hash exists on S3."""
        if content_hash is None:
            return
        prefix = self._get_prefix(content_hash)
        if prefix is None:
            return

        # First, check the standard file name:
        blob = Blob(os.path.join(prefix, 'data'), self.bucket)
        if blob.exists():
            return blob

        # Second, iterate over all file names:
        for blob in self.bucket.list_blobs(max_results=1, prefix=prefix):
            return blob 
Example 5
Project: servicelayer   Author: alephdata   File: gs.py    MIT License 6 votes vote down vote up
def archive_file(self, file_path, content_hash=None, mime_type=None):
        """Store the file located at the given path on Google, based on a path
        made up from its SHA1 content hash."""
        file_path = ensure_path(file_path)
        if content_hash is None:
            content_hash = checksum(file_path)

        if content_hash is None:
            return

        file_path = ensure_posix_path(file_path)
        for attempt in service_retries():
            try:
                blob = self._locate_blob(content_hash)
                if blob is not None:
                    return content_hash

                path = os.path.join(self._get_prefix(content_hash), 'data')
                blob = Blob(path, self.bucket)
                blob.upload_from_filename(file_path, content_type=mime_type)
                return content_hash
            except FAILURES as exc:
                log.error("Store error: %s", exc)
                backoff(failures=attempt) 
Example 6
Project: turbinia   Author: google   File: output_manager.py    Apache License 2.0 6 votes vote down vote up
def copy_to(self, source_path):
    if os.path.getsize(source_path) == 0:
      message = (
          'Local source file {0:s} is empty.  Not uploading to GCS'.format(
              source_path))
      log.error(message)
      raise TurbiniaException(message)

    bucket = self.client.get_bucket(self.bucket)
    destination_path = os.path.join(
        self.base_output_dir, self.unique_dir, os.path.basename(source_path))
    log.info(
        'Writing {0:s} to GCS path {1:s}'.format(source_path, destination_path))
    try:
      blob = storage.Blob(destination_path, bucket, chunk_size=self.CHUNK_SIZE)
      blob.upload_from_filename(source_path, client=self.client)
    except exceptions.GoogleCloudError as exception:
      message = 'File upload to GCS failed: {0!s}'.format(exception)
      log.error(message)
      raise TurbiniaException(message)
    return os.path.join('gs://', self.bucket, destination_path) 
Example 7
Project: healthcare-deid   Author: GoogleCloudPlatform   File: server.py    Apache License 2.0 6 votes vote down vote up
def verify_gcs_path(path):
  """Verifies that a GCS path exists.

  Args:
    path: A string that represents the target path.
  Returns:
    A boolean of the verification status.
  """
  storage_client = storage.Client()
  path_info = gcsutil.GcsFileName.from_path(path)
  try:
    bucket = storage_client.get_bucket(path_info.bucket)
  except exceptions.NotFound:
    return False
  return storage.Blob(bucket=bucket,
                      name=path_info.blob).exists(storage_client) 
Example 8
Project: neural-fingerprinting   Author: StephanZheng   File: cloud_client.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_blob(self, blob_name):
    """Gets google.cloud.storage.blob.Blob object by blob name."""
    return self.bucket.get_blob(blob_name) 
Example 9
Project: neural-fingerprinting   Author: StephanZheng   File: cloud_client.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def new_blob(self, blob_name):
    """Creates new storage blob with provided name."""
    return storage.Blob(blob_name, self.bucket) 
Example 10
Project: ansible_collections_google   Author: ansible-collections   File: gcp_storage_object.py    GNU General Public License v3.0 5 votes vote down vote up
def download_file(module, client, name, dest):
    try:
        bucket = client.get_bucket(module.params['bucket'])
        blob = Blob(name, bucket)
        with open(dest, "wb") as file_obj:
            blob.download_to_file(file_obj)
        return blob_to_dict(blob)
    except google.cloud.exceptions.NotFound as e:
        module.fail_json(msg=str(e)) 
Example 11
Project: ansible_collections_google   Author: ansible-collections   File: gcp_storage_object.py    GNU General Public License v3.0 5 votes vote down vote up
def upload_file(module, client, src, dest):
    try:
        bucket = client.get_bucket(module.params['bucket'])
        blob = Blob(dest, bucket)
        with open(src, "r") as file_obj:
            blob.upload_from_file(file_obj)
        return blob_to_dict(blob)
    except google.cloud.exceptions.GoogleCloudError as e:
        module.fail_json(msg=str(e)) 
Example 12
Project: ansible_collections_google   Author: ansible-collections   File: gcp_storage_object.py    GNU General Public License v3.0 5 votes vote down vote up
def delete_file(module, client, name):
    try:
        bucket = client.get_bucket(module.params['bucket'])
        blob = Blob(name, bucket)
        blob.delete()
        return {}
    except google.cloud.exceptions.NotFound as e:
        module.fail_json(msg=str(e)) 
Example 13
Project: cloudygo   Author: sethtroisi   File: cloudygo.py    Apache License 2.0 5 votes vote down vote up
def __get_gs_game(self, bucket, model_name, filename, view_type):
        assert 'full' in view_type, view_type


        # Maybe it's worth caching these for, now just globally rate limit
        now = time.time()
        if now - self.last_cloud_request < 1:
            return None
        self.last_cloud_request = now

        # NOTE: needs to be before cloud_bucket clears bucket.
        from google.cloud import storage
        cloud_bucket = CloudyGo.get_cloud_bucket(bucket)
        if bucket not in self.storage_clients:
            client = storage.Client(project="minigo-pub").bucket(cloud_bucket)
            self.storage_clients[bucket] = client

        # MINIGO-HACK
        if bucket in CloudyGo.MINIGO_TS:
            # Take a guess at based on timestamp
            hour_guess = CloudyGo.guess_hour_dir(filename)
            model_name = hour_guess

            path = os.path.join('sgf', 'full', hour_guess, filename)
            if cloud_bucket == CloudyGo.FULL_GAME_CLOUD_BUCKET:
                # MINIGO_PUB has an outer folder of the bucket name
                path = os.path.join(bucket, path)
        else:
            path = os.path.join(bucket, 'sgf', model_name, 'full', filename)

        blob = self.storage_clients[bucket].get_blob(path)
        print("Checking {}: {}".format(filename, blob is not None))
        print(self.storage_clients[bucket], path)
        if not isinstance(blob, storage.Blob):
            return None

        data = blob.download_as_string().decode('utf8')
        return data 
Example 14
Project: gcp-variant-transforms   Author: googlegenomics   File: vcf_file_composer.py    Apache License 2.0 5 votes vote down vote up
def _compose_vcf_data_files(project, vcf_data_files_folder):
  # type: (str, str) -> storage.Blob
  """Composes multiple VCF data files to one VCF data file.

  Args:
    project: The project name.
    vcf_data_files_folder: The folder that contains all VCF data files.
  """
  bucket_name, blob_prefix = gcsio.parse_gcs_path(vcf_data_files_folder)
  multi_process_composer = MultiProcessComposer(project, bucket_name,
                                                blob_prefix)
  return multi_process_composer.get_composed_blob() 
Example 15
Project: gcp-variant-transforms   Author: googlegenomics   File: vcf_file_composer.py    Apache License 2.0 5 votes vote down vote up
def _create_blob(client, file_path):
  # type: (storage.Client, str) -> storage.Blob
  bucket_name, blob_name = gcsio.parse_gcs_path(file_path)
  file_blob = client.get_bucket(bucket_name).blob(blob_name)
  file_blob.content_type = 'text/plain'
  return file_blob 
Example 16
Project: gcp-variant-transforms   Author: googlegenomics   File: vcf_file_composer.py    Apache License 2.0 5 votes vote down vote up
def get_composed_blob(self):
    # type: () -> storage.Blob
    """Returns the final blob that all blobs composed to."""
    return self._compose_blobs_to_one(self._blob_prefix) 
Example 17
Project: loaner   Author: google   File: storage.py    Apache License 2.0 5 votes vote down vote up
def insert_blob(self, path, contents, bucket_name=None):
    """Inserts a new json encoded Blob in the Cloud Storage bucket provided.

    NOTE: If a Blob already exists at the provided path it will be overwritten
    by the new contents without warning.

    Args:
      path: str, the path of the Blob to create relative to the root of the
          Google Cloud Storage Bucket including the name of the Blob.
      contents: dict, a dictionary representing the contents of the new Blob.
      bucket_name: str, the name of the Google Cloud Storage Bucket to insert
          the new Blob into.
    """
    bucket_name = bucket_name or self._config.bucket

    blob = storage.Blob(
        name=path,
        bucket=self.get_bucket(bucket_name),
    )

    blob.upload_from_string(
        data=json.dumps(contents),
        content_type='application/json',
        client=self._client,
    )

    logging.info(
        'Successfully uploaded blob %r to bucket %r.', path, bucket_name) 
Example 18
Project: lm-human-preferences   Author: openai   File: gcs.py    MIT License 5 votes vote down vote up
def upload_contents(url, contents, client=None):
    """Given a gs:// path, returns contents of the corresponding blob."""
    if client is None:
        client = storage.Client()
    bucket_name, path = parse_url(url)
    bucket = client.get_bucket(bucket_name)
    blob = storage.Blob(path, bucket)
    blob.upload_from_string(contents) 
Example 19
Project: analysis-py-utils   Author: verilylifesciences   File: bq_test.py    Apache License 2.0 5 votes vote down vote up
def test_export_table(self,
                          out_fmt,  # type: str
                          compression,  # type: bool
                          dir_in_bucket,  # type: str
                          output_ext,  # type: str
                          explicit_filename,  # type: str
                          expected_output_path,  # type: str
                          support_multifile_export,  # type: bool
                          test_description  # type: str
                          ):
        # type: (...) -> None
        """Test ExportTableToBucket
        Args:
            out_fmt: Output format. Must be one of {'csv', 'json', 'avro'}
            compression: Whether to compress file using GZIP. Cannot be applied to avro
            dir_in_bucket: The directory in the bucket to store the output files
            output_ext: Extension of the output file name
            explicit_filename: Explicitly specified filename.
            expected_output_path: Expected output path
            test_description: A description of the test
        """

        fnames = self.client.export_table_to_bucket(
            self.src_table_name, self.temp_bucket_name, dir_in_bucket, out_fmt, compression,
            output_ext, support_multifile_export=support_multifile_export,
            explicit_filename=explicit_filename)

        # Test that the output file name is returned (everything after the last "/" of the path).
        self.assertEqual(fnames, [expected_output_path.rsplit('/', 1)[-1]])

        # Test that the object is in the bucket.
        self.assertTrue(
                isinstance(self.bucket.get_blob(expected_output_path), storage.Blob),
                           test_description +
                           ': File {} is not in {}'.format(expected_output_path,
                                                           str([x for x in self.bucket.list_blobs()]))) 
Example 20
Project: analysis-py-utils   Author: verilylifesciences   File: bq_test.py    Apache License 2.0 5 votes vote down vote up
def test_export_schema(self,
                           dir_in_bucket,  # type: str
                           output_ext,  # type: str
                           explicit_filename,  # type: str
                           expected_schema_path,  # type: str
                           test_description  # type:str
                           ):
        # type: (str, str, str, str) -> None
        """Test ExportSchemaToBucket
         Args:
            dir_in_bucket: The directory in the bucket to store the output files
            output_ext: Extension of the output file name
            explicit_filename: Explicitly specified filename.
            expected_output_path: Expected output path
            test_description: A description of the test
        """

        fname = self.client.export_schema_to_bucket(self.src_table_name, self.temp_bucket_name,
                                                    dir_in_bucket, output_ext,
                                                    explicit_filename=explicit_filename)

        # Test that the output file name is returned (everything after the last "/" of the path).
        self.assertEqual(fname, expected_schema_path.rsplit('/', 1)[-1])

        # Test that the object is in the bucket.
        self.assertTrue(
                isinstance(self.bucket.get_blob(expected_schema_path), storage.Blob),
                test_description) 
Example 21
Project: delta   Author: celskeggs   File: remote.py    MIT License 5 votes vote down vote up
def get_blob(object):
    return storage.Blob(name=object, bucket=get_ref()) 
Example 22
Project: tensorboardX   Author: lanpa   File: record_writer.py    MIT License 5 votes vote down vote up
def __init__(self, path):
        if not GCS_ENABLED:
            raise ImportError("`google-cloud-storage` must be installed in order to use "
                              "the 'gs://' protocol")

        self.path = path
        self.buffer = io.BytesIO()

        from google.cloud import storage
        client = storage.Client()

        bucket_name, filepath = self.bucket_and_path()
        bucket = storage.Bucket(client, bucket_name)
        self.blob = storage.Blob(filepath, bucket) 
Example 23
Project: tensorboardX   Author: lanpa   File: record_writer.py    MIT License 5 votes vote down vote up
def __init__(self, path):
        if not GCS_ENABLED:
            raise ImportError("`google-cloud-storage` must be installed in order to use "
                              "the 'gs://' protocol")

        self.path = path
        self.buffer = io.BytesIO()

        from google.cloud import storage
        client = storage.Client()

        bucket_name, filepath = self.bucket_and_path()
        bucket = storage.Bucket(client, bucket_name)
        self.blob = storage.Blob(filepath, bucket) 
Example 24
Project: airflow   Author: apache   File: test_gcs.py    Apache License 2.0 5 votes vote down vote up
def test_copy(self, mock_service, mock_bucket):
        source_bucket = 'test-source-bucket'
        source_object = 'test-source-object'
        destination_bucket = 'test-dest-bucket'
        destination_object = 'test-dest-object'

        destination_bucket_instance = mock_bucket
        source_blob = mock_bucket.blob(source_object)
        destination_blob = storage.Blob(
            bucket=destination_bucket_instance,
            name=destination_object)

        # Given
        bucket_mock = mock_service.return_value.bucket
        bucket_mock.return_value = mock_bucket
        copy_method = bucket_mock.return_value.copy_blob
        copy_method.return_value = destination_blob

        # When
        response = self.gcs_hook.copy(  # pylint: disable=assignment-from-no-return
            source_bucket=source_bucket,
            source_object=source_object,
            destination_bucket=destination_bucket,
            destination_object=destination_object
        )

        # Then
        self.assertEqual(response, None)
        copy_method.assert_called_once_with(
            blob=source_blob,
            destination_bucket=destination_bucket_instance,
            new_name=destination_object
        ) 
Example 25
Project: airflow   Author: apache   File: test_gcs.py    Apache License 2.0 5 votes vote down vote up
def test_delete(self, mock_service, mock_bucket):
        test_bucket = 'test_bucket'
        test_object = 'test_object'
        blob_to_be_deleted = storage.Blob(name=test_object, bucket=mock_bucket)

        get_bucket_method = mock_service.return_value.get_bucket
        get_blob_method = get_bucket_method.return_value.get_blob
        delete_method = get_blob_method.return_value.delete
        delete_method.return_value = blob_to_be_deleted

        response = self.gcs_hook.delete(  # pylint: disable=assignment-from-no-return
            bucket_name=test_bucket,
            object_name=test_object)
        self.assertIsNone(response) 
Example 26
Project: airflow   Author: apache   File: gcs.py    Apache License 2.0 5 votes vote down vote up
def delete(self, bucket_name, object_name):
        """
        Deletes an object from the bucket.

        :param bucket_name: name of the bucket, where the object resides
        :type bucket_name: str
        :param object_name: name of the object to delete
        :type object_name: str
        """
        client = self.get_conn()
        bucket = client.bucket(bucket_name)
        blob = bucket.blob(blob_name=object_name)
        blob.delete()

        self.log.info('Blob %s deleted.', object_name) 
Example 27
Project: airflow   Author: apache   File: gcs.py    Apache License 2.0 5 votes vote down vote up
def _calculate_sync_destination_path(
        self,
        blob: storage.Blob,
        destination_object: Optional[str],
        source_object_prefix_len: int
    ) -> str:
        return (
            path.join(destination_object, blob.name[source_object_prefix_len:])
            if destination_object
            else blob.name[source_object_prefix_len:]
        ) 
Example 28
Project: airflow   Author: apache   File: gcs.py    Apache License 2.0 5 votes vote down vote up
def _prepare_sync_plan(
        source_bucket: storage.Bucket,
        destination_bucket: storage.Bucket,
        source_object: Optional[str],
        destination_object: Optional[str],
        recursive: bool,
    ) -> Tuple[Set[storage.Blob], Set[storage.Blob], Set[storage.Blob]]:
        # Calculate the number of characters that remove from the name, because they contain information
        # about the parent's path
        source_object_prefix_len = len(source_object) if source_object else 0
        destination_object_prefix_len = len(destination_object) if destination_object else 0
        delimiter = "/" if not recursive else None
        # Fetch blobs list
        source_blobs = list(source_bucket.list_blobs(prefix=source_object, delimiter=delimiter))
        destination_blobs = list(
            destination_bucket.list_blobs(prefix=destination_object, delimiter=delimiter))
        # Create indexes that allow you to identify blobs based on their name
        source_names_index = {a.name[source_object_prefix_len:]: a for a in source_blobs}
        destination_names_index = {a.name[destination_object_prefix_len:]: a for a in destination_blobs}
        # Create sets with names without parent object name
        source_names = set(source_names_index.keys())
        destination_names = set(destination_names_index.keys())
        # Determine objects to copy and delete
        to_copy = source_names - destination_names
        to_delete = destination_names - source_names
        to_copy_blobs = {source_names_index[a] for a in to_copy}  # type: Set[storage.Blob]
        to_delete_blobs = {destination_names_index[a] for a in to_delete}  # type: Set[storage.Blob]
        # Find names that are in both buckets
        names_to_check = source_names.intersection(destination_names)
        to_rewrite_blobs = set()  # type: Set[storage.Blob]
        # Compare objects based on crc32
        for current_name in names_to_check:
            source_blob = source_names_index[current_name]
            destination_blob = destination_names_index[current_name]
            # if the objects are different, save it
            if source_blob.crc32c != destination_blob.crc32c:
                to_rewrite_blobs.add(source_blob)
        return to_copy_blobs, to_delete_blobs, to_rewrite_blobs 
Example 29
Project: driblet   Author: google   File: setup_cloud.py    Apache License 2.0 5 votes vote down vote up
def _blob_exists(self, bucket, blob):
    """Checks if given Cloud Storage blob exists.

    Args:
      bucket: Cloud Storage bucket object.
      blob: Cloud Storage blob object.

    Returns:
      True if blob exists, False otherwise.
    """
    blob = storage.Blob(bucket=bucket, name=blob.name)
    if blob.exists(self._client):
      return True
    else:
      return False 
Example 30
Project: python-script   Author: 9468305   File: psi.py    Apache License 2.0 5 votes vote down vote up
def save(url, report):
    '''Save to https://console.cloud.google.com/storage/browser/[bucket-id]/'''
    client = storage.Client()
    bucket = client.get_bucket("psi-report")
    blob = Blob(f"${parse.quote_plus(url)}.json", bucket)
    blob.upload_from_string(report, "application/json") 
Example 31
Project: docuploader   Author: googleapis   File: upload.py    Apache License 2.0 5 votes vote down vote up
def upload(
    *, source: str, destination: str, bucket: str, credentials_file: str
) -> storage.Blob:
    client = _make_client(credentials_file)
    bucket_ = client.get_bucket(bucket)
    blob = bucket_.blob(destination)
    blob.upload_from_filename(filename=source)
    return blob 
Example 32
Project: cohorts   Author: hammerlab   File: gcloud_storage.py    Apache License 2.0 5 votes vote down vote up
def upload_file(self, localpath, gsuri):
        # And now request the handles for bucket and the file
        bucket_name, rel_path = self.parse_uri(gsuri)
        bucket = self.client.get_bucket(bucket_name)
        ublob = storage.Blob(rel_path, bucket)
        ublob.upload_from_filename(localpath) 
Example 33
Project: cohorts   Author: hammerlab   File: gcloud_storage.py    Apache License 2.0 5 votes vote down vote up
def __exit__(self, *args):
        # We are done with the open file, so let's close it
        self._localfile.close()
        # If write mode is on, then upload the altered one to GS
        if self.is_write:
            bucket_name, rel_path = self.gcio.parse_uri(self.gsuri)
            bucket = self.gcio.client.get_bucket(bucket_name)
            ublob = storage.Blob(rel_path, bucket)
            ublob.upload_from_filename(self._localfile_path)
        # And because of the type of the temp file we created,
        # we are responsible for deleting it when we are finshed:
        os.remove(self._localfile_path) 
Example 34
Project: content   Author: demisto   File: GoogleCloudStorage.py    MIT License 5 votes vote down vote up
def gcs_download_file():
    bucket_name = demisto.args()['bucket_name']
    blob_name = demisto.args()['object_name']
    saved_file_name = demisto.args().get('saved_file_name', '')

    bucket = client.get_bucket(bucket_name)
    blob = storage.Blob(blob_name, bucket)
    saved_file_name = download_blob(blob, saved_file_name)

    demisto.results(file_result_existing_file(saved_file_name)) 
Example 35
Project: content   Author: demisto   File: GoogleCloudStorage.py    MIT License 5 votes vote down vote up
def get_blob_acl(bucket_name, blob_name):
    bucket = client.get_bucket(bucket_name)
    blob = storage.Blob(blob_name, bucket)
    return blob.acl 
Example 36
Project: pulse-data   Author: Recidiviz   File: gcsfs_path.py    GNU General Public License v3.0 5 votes vote down vote up
def from_blob(
            cls,
            blob: storage.Blob
    ) -> Union['GcsfsFilePath', 'GcsfsDirectoryPath']:
        # storage.Blob and storage.Bucket names are url-escaped, we want to
        # convert back to unescaped strings.
        return GcsfsPath.from_bucket_and_blob_name(
            bucket_name=unquote(blob.bucket.name),
            blob_name=unquote(blob.name)) 
Example 37
Project: python-docs-samples   Author: GoogleCloudPlatform   File: encryption_test.py    Apache License 2.0 5 votes vote down vote up
def test_blob():
    """Provides a pre-existing blob in the test bucket."""
    bucket = storage.Client().bucket(BUCKET)
    blob = Blob(
        "encryption_test_sigil",
        bucket,
        encryption_key=TEST_ENCRYPTION_KEY_DECODED,
    )
    content = "Hello, is it me you're looking for?"
    blob.upload_from_string(content)
    return blob.name, content 
Example 38
Project: stoq-plugins-public   Author: PUNCH-Cyber   File: gcs.py    Apache License 2.0 5 votes vote down vote up
def get(self, task: ArchiverResponse) -> Payload:
        """
        Retrieve archived payload from gcs

        """
        meta = PayloadMeta(
            extra_data={
                'bucketId': task.results['bucketId'],
                'objectId': task.results['objectId'],
                'projectId': task.results['projectId'],
            }
        )
        count = 0
        client = Client(project=task.results['projectId'])
        while count < self.max_retries:
            try:
                bucket = client.get_bucket(task.results['bucketId'])
                blob = Blob(task.results['objectId'], bucket)
                content = BytesIO()
                blob.download_to_file(content)
                break
            except (
                InvalidResponse,
                GoogleAPICallError,
                InternalServerError,
                SSLError,
            ) as e:
                if count >= self.max_retries:
                    raise StoqPluginException(
                        f'Failed to download {task.results["bucketId"]}/{task.results["objectId"]} from GCS: {str(e)}'
                    )
                count += 1
                sleep(randrange(0, 4))
        content.seek(0)
        data = content.read()
        if self.use_encryption:
            data = self._decrypt(data)
        return Payload(data, meta) 
Example 39
Project: stoq-plugins-public   Author: PUNCH-Cyber   File: gcs.py    Apache License 2.0 5 votes vote down vote up
def _upload(self, payload: bytes, filename: str, bucket: str) -> None:
        """
        Upload a payload to GCS

        """

        client = Client(project=self.project_id)
        count = 0
        while count < self.max_retries:
            try:
                bucket_obj = client.get_bucket(bucket)
                if self.use_encryption:
                    payload = self._encrypt(payload)
                content = BytesIO(payload)
                blob = Blob(filename, bucket_obj)
                blob.upload_from_file(content)
                break
            except (
                InvalidResponse,
                GoogleAPICallError,
                InternalServerError,
                SSLError,
            ) as e:
                if count >= self.max_retries:
                    raise StoqPluginException(
                        f'Failed to upload {bucket}/{filename} to GCS: {str(e)}'
                    )
                count += 1
                sleep(randrange(0, 4)) 
Example 40
Project: genometools   Author: flo-compbio   File: storage.py    GNU General Public License v3.0 5 votes vote down vote up
def upload_file(client, bucket, local_path, remote_path, overwrite=False):
    """Uploads a file to a bucket.
    
    TODO: docstring"""
    bucket = client.get_bucket(bucket)
    blob = storage.Blob(remote_path, bucket)
    if (not overwrite) and blob.exists():
        raise Conflict('File/object already exists on the bucket!')
    blob.upload_from_filename(local_path) 
Example 41
Project: eclipse2017   Author: google   File: uploader.py    Apache License 2.0 5 votes vote down vote up
def _upload_derived(derived_file, bucket):
    blob = storage.Blob(os.path.basename(derived_file), bucket)

    # Upload derived file
    try:
        blob.upload_from_filename(derived_file)
        msg = 'Successfully uploaded derived {0} to GCS'
        logging.info(msg.format(derived_file))

    except Exception as e:
        msg = 'Derived {0} failed to upload to GCS: {1}'
        logging.error(msg.format(derived_file, e))
        return False
    return True 
Example 42
Project: google-pandas-load   Author: augustin-barillec   File: loader.py    MIT License 5 votes vote down vote up
def _local_file_to_blob(self, local_file_path):
        local_file_basename = os.path.basename(local_file_path)
        if self.gs_dir_path is None:
            blob_name = local_file_basename
        else:
            blob_name = self.gs_dir_path + '/' + local_file_basename
        blob = storage.Blob(name=blob_name,
                            bucket=self.bucket,
                            chunk_size=self._chunk_size)
        blob.upload_from_filename(filename=local_file_path) 
Example 43
Project: gcp-variant-transforms   Author: googlegenomics   File: vcf_file_composer.py    Apache License 2.0 4 votes vote down vote up
def _compose_blobs_to_one(self, blob_prefix):
    # type: (str) -> storage.Blob
    """Composes multiple blobs with prefix `blob_prefix` in GCS to one.

    Note that Cloud Storage allows to compose up to 32 objects. This method
    composes the blobs recursively until there is only one file.

    Args:
      blob_prefix: the prefix used to filter blobs. Only the files with this
        prefix will be composed.

    Returns:
      The final blob that all blobs with `blob_prefix` composed to.
    """
    blobs_to_be_composed = list(self._bucket.list_blobs(prefix=blob_prefix))
    logging.info('Total number of blobs is %d.', len(blobs_to_be_composed))
    if not blobs_to_be_composed:
      raise RuntimeError('No VCF shards found.')
    if len(blobs_to_be_composed) == 1:
      return blobs_to_be_composed[0]
    new_blob_prefix = filesystems.FileSystems.join(blob_prefix, 'composed_')
    blobs_to_compose_args = []
    for blob_names in self._break_list_in_chunks(blobs_to_be_composed,
                                                 _MAX_NUM_OF_BLOBS_PER_COMPOSE):
      _, file_name = filesystems.FileSystems.split(blob_names[0])
      new_blob_name = ''.join([new_blob_prefix, file_name])
      blobs_to_compose_args.append(
          (self._project, self._bucket_name, blob_names, new_blob_name))

    num_retries = 0
    while num_retries <= _MAX_NUM_OF_COMPOSE_RETRIES:
      proc_pool = multiprocessing.Pool(processes=8)
      results = []
      for arg in blobs_to_compose_args:
        results.append(proc_pool.apply_async(func=_compose_files, args=arg))
      proc_pool.close()

      failed_blobs_to_compose_args = []
      for result, argument in zip(results, blobs_to_compose_args):
        try:
          result.get(_COMPOSE_TIMEOUT_SECONDS)
        except multiprocessing.TimeoutError:
          logging.warning('Aborting the composing of blobs (%s to %s) due to '
                          'timeout.', argument[2][0], argument[2][-1])
          failed_blobs_to_compose_args.append(argument)

      if failed_blobs_to_compose_args:
        num_retries += 1
        blobs_to_compose_args = failed_blobs_to_compose_args
        logging.warning(
            '%d jobs of composing of blobs failed due to timeout. Retrying '
            '%d of %d.', len(blobs_to_compose_args), num_retries,
            _MAX_NUM_OF_COMPOSE_RETRIES)
      else:
        break
    else:
      raise RuntimeError('Composing of blobs failed after {} '
                         'retries.'.format(_MAX_NUM_OF_COMPOSE_RETRIES))
    return self._compose_blobs_to_one(new_blob_prefix) 
Example 44
Project: turbinia   Author: google   File: output_manager.py    Apache License 2.0 4 votes vote down vote up
def copy_from(self, source_path):
    """Copies output file from the managed location to the local output dir.

    Args:
      source_file (string): A path to a source file in the managed storage
          location.  This path should be in a format matching the storage type
          (e.g. GCS paths are formatted like 'gs://bucketfoo/' and local paths
          are like '/foo/bar'.

    Returns:
      The path the file was saved to, or None if file was not written.

    Raises:
      TurbiniaException: If file retrieval fails.
    """
    bucket = self.client.get_bucket(self.bucket)
    gcs_path = self._parse_gcs_path(source_path)[1]
    destination_path = os.path.join(
        self.local_output_dir, os.path.basename(source_path))
    log.info(
        'Writing GCS file {0:s} to local path {1:s}'.format(
            source_path, destination_path))
    try:
      blob = storage.Blob(gcs_path, bucket, chunk_size=self.CHUNK_SIZE)
      blob.download_to_filename(destination_path, client=self.client)
    except exceptions.RequestRangeNotSatisfiable as exception:
      message = (
          'File retrieval from GCS failed, file may be empty: {0!s}'.format(
              exception))
      log.error(message)
      raise TurbiniaException(message)
    except exceptions.GoogleCloudError as exception:
      message = 'File retrieval from GCS failed: {0!s}'.format(exception)
      log.error(message)
      raise TurbiniaException(message)

    if not os.path.exists(destination_path):
      message = (
          'File retrieval from GCS failed: Local file {0:s} does not '
          'exist'.format(destination_path))
      log.error(message)
      raise TurbiniaException(message)
    return destination_path 
Example 45
Project: eclipse2017   Author: google   File: pipeline.py    Apache License 2.0 4 votes vote down vote up
def upload(self, fnames):
        uploaded_files = []

        bucket = self.storage.get_bucket(config.GCS_PROCESSED_PHOTOS_BUCKET)
        batch = self.datastore.batch()
        batch.begin()

        for fname in fnames:
            name, ext = os.path.splitext(fname)
            fpath = '{0}/{1}{2}'.format(constants.IMAGE_PROCESSOR_DATA_DIR, name, ext)
            objname = '{0}{1}'.format(name, ext)
            blob = storage.Blob(objname, bucket)
            try:
                blob.upload_from_file(open(fpath, "rb"))
                uploaded_files.append(fname)
                msg = 'Successfully uploaded {0} to Cloud Storage'
                logging.info(msg.format(fname))
            except Exception, e:
                msg = 'Failed to upload {0} to Cloud Storage: {1}'
                logging.error(msg.format(fname, e))
            else:
                # Update original photo entity
                photo_key = self.datastore.key(ds.DATASTORE_PHOTO, fname)
                photo_entity = self.datastore.get(photo_key)
                photo_entity.update({'processed': True})
                batch.put(photo_entity)

                # Create datastore entry for oriented image
                name, ext = os.path.splitext(fname)
                resized_fname = '{0}{1}'.format(name, ext)
                oriented_key = self.datastore.key(ds.DATASTORE_ORIENTED_IMAGE, resized_fname)
                oriented_entity = datastore.Entity(oriented_key)
                oriented_entity['original_photo'] = photo_key
                oriented_entity['image_type'] = unicode(ds.TOTALITY_IMAGE_TYPE)
                lat = photo_entity['lat']
                lon = photo_entity['lon']
                # TODO(dek): properly repsect LatRef and LonRef here
                lon = -lon
                p = Point(lat, lon)
                np = self.eclipse_gis.interpolate_nearest_point_on_line(p)
                # TODO(dek):
                # map each location into its associated center point
                # (based on the golden data in eclipse_gis)
                # and sort by location/time bins
                oriented_entity[ds.TOTALITY_ORDERING_PROPERTY] = np
                batch.put(oriented_entity)

        # Cloud Datastore API request