python source code of googleconnector

resolwe-master
- .coveragerc
- LICENSE
- .readthedocs.yml
- .gitattributes
- README.rst
- .pylintrc
- setup.py
- resolwe
  - __about__.py
  - composer.py
  - storage
    - apps.py
    - cleanup.py
    - fixtures
      - storage_data.yaml
      - storage_processes.yaml
      - storage_users.yaml
    - models.py
    - views.py
    - manager.py
    - consumers.py
    - __init__.py
    - migrations
      - 0006_assign_referenced_paths.py
      - 0003_add_hash_fields_to_referenced_path.py
      - 0001_initial.py
      - 0005_referencedpath_storage_locations.py
      - __init__.py
      - 0004_calculate_hashes.py
      - 0002_create_filestorage_objects.py
      - 0007_remove_referencedpath_file_storage.py
    - tests
      - test_utils.py
      - test_listener.py
      - test_cleanup.py
      - storage_credentials_test_connectors.py
      - test_models.py
      - storage_credentials_test_transfer.py
      - test_circular_buffer.py
      - __init__.py
      - test_transfer.py
      - test_streamhasher.py
      - test_manager.py
    - settings.py
    - signals.py
    - connectors
      - transfer.py
      - exceptions.py
      - localconnector.py
      - s3connector.py
      - googleconnector.py
      - hasher.py
      - registry.py
      - baseconnector.py
      - __init__.py
      - utils.py
      - circular_buffer.py
    - management
      - commands
        runstoragecleanup.py
        __init__.py
        runstoragemanager.py
      - __init__.py
    - protocol.py
  - db
    - postgresql
      - __init__.py
      - base.py
    - __init__.py
  - test
    - testcases
      - api.py
      - __init__.py
      - process.py
      - setting_overrides.py
    - __init__.py
    - utils.py
    - tests
      - files
        example_file.txt
      - test_testing_framework.py
      - processes
        tests.yml
  - api_urls.py
  - elastic
    - apps.py
    - fields.py
    - utils
      - __init__.py
    - routers.py
    - composer.py
    - indices.py
    - viewsets.py
    - __init__.py
    - lookup.py
    - tests
      - test_index.py
      - test_viewsets.py
      - test_composer.py
      - __init__.py
      - test_app
        viewsets.py
        models.py
        __init__.py
        elastic_indexes.py
        migrations
        0001_initial.py
        __init__.py
    - mixins.py
    - signals.py
    - builder.py
    - management
      - commands
        elastic_purge.py
        elastic_mapping.py
        __init__.py
        elastic_index.py
      - __init__.py
    - dependencies.py
    - pagination.py
  - process
    - fields.py
    - descriptor.py
    - __main__.py
    - __init__.py
    - parser.py
    - tests
      - test_python_process.py
      - files
        testfile.txt
        testfile2.txt
      - __init__.py
      - workflows
        test_workflow.yml
      - processes
        python_json.py
        python_test.py
        python_data_by_slug.py
        tests.yml
      - descriptors
        tests.yml
    - runtime.py
  - toolkit
    - docker_images
      - upload-tab-file
        README.md
        Dockerfile
      - base
        re-import.sh
        Dockerfile.fedora-31
        Dockerfile.ubuntu-18.04
        README.md
        curlprogress.py
      - archiver
        README.md
        Dockerfile
    - __init__.py
    - tests
      - test_dirs.py
      - files
        file tab.1.xlsx
        file tab.txt
        file tab.xls
        file tab.tab.gz
        xls_file_tabular.tab.gz
        xlsx_file_tabular.tab.gz
        dir sym link.tar.gz
        csv_file_tabular.tab.gz
        dir bad format.tar.gz
        file binary
        tab_file_tabular.tab.gz
        compressed dir.tar.gz
        upload_file
        input_file.txt
        file tab.csv.gz
      - test_files.py
      - test_archiver.py
      - __init__.py
    - tools
      - parse_tabular_file.py
    - processes
      - archiver.yml
      - dirs.yml
      - files.yml
  - __init__.py
  - permissions
    - permissions.py
    - fixtures
      - permissions.yaml
      - readme.txt
      - data.yaml
      - processes.yaml
      - collections.yaml
      - users.yaml
    - shortcuts.py
    - filters.py
    - __init__.py
    - utils.py
    - tests
      - test_utils.py
      - test_collection.py
      - test_shortcuts.py
      - test_permissions.py
      - test_data.py
      - __init__.py
      - test_tool.py
    - loader.py
    - mixins.py
  - utils.py
  - test_helpers
    - test_runner.py
    - __init__.py
    - management
      - commands
        __init__.py
        show_profile.py
      - __init__.py
  - rest
    - fields.py
    - serializers.py
    - tests.py
    - __init__.py
    - projection.py
  - flow
    - apps.py
    - engine.py
    - execution_engines
      - exceptions.py
      - workflow
        __init__.py
      - bash
        __init__.py
      - python
        __init__.py
      - __init__.py
      - base.py
    - utils
      - exceptions.py
      - stats.py
      - __init__.py
      - iterators.py
      - docs
        autoprocess.py
        __init__.py
    - executors
      - null
        run.py
        prepare.py
        __init__.py
      - manager_commands.py
      - run.py
      - transfer.py
      - prepare.py
      - docker
        run.py
        prepare.py
        constants.py
        seccomp.py
        __init__.py
      - logger.py
      - global_settings.py
      - __main__.py
      - __init__.py
      - collect.py
      - local
        run.py
        prepare.py
        __init__.py
      - requirements.txt
      - protocol.py
    - managers
      - state.py
      - consumer.py
      - listener.py
      - workload_connectors
        local.py
        slurm.py
        __init__.py
        celery.py
        base.py
      - __init__.py
      - utils.py
      - dispatcher.py
      - protocol.py
    - views
      - data.py
      - relation.py
      - entity.py
      - descriptor.py
      - __init__.py
      - utils.py
      - storage.py
      - collection.py
      - mixins.py
      - process.py
    - exceptions.py
    - expression_engines
      - jinja
        filters.py
        __init__.py
      - exceptions.py
      - __init__.py
      - base.py
    - migration_ops.py
    - models
      - data.py
      - fields.py
      - secret.py
      - entity.py
      - functions.py
      - descriptor.py
      - __init__.py
      - utils.py
      - storage.py
      - collection.py
      - process.py
      - migrations.py
      - base.py
    - tasks.py
    - filters.py
    - __init__.py
    - migrations
      - 0023_process_entity_2.py
      - 0011_preserve_parents.py
      - 0022_process_entity_1.py
      - triggers_entity.sql
      - 0001_squashed_0030_change_slug_field.py
      - 0015_refactor_relations_1.py
      - 0028_add_data_location.py
      - 0041_remove_download_perm.py
      - triggers_data.sql
      - 0012_recreate_empty_parents.py
      - 0019_relation_type_cicharfield.py
      - 0036_add_m2o_fields.py
      - 0003_data_dependency_1.py
      - 0035_process_entity_new.py
      - 0013_migration_history.py
      - 0008_compute_size.py
      - 0037_migrate_m2o.py
      - 0005_data_dependency_3.py
      - 0002_set_process_owners.py
      - 0021_collection_tags.py
      - 0004_data_dependency_2.py
      - 0024_process_entity_3.py
      - 0032_add_collection_duplicate.py
      - 0040_remove_entity_descriptor_completed.py
      - 0025_entity_type.py
      - 0010_add_secret.py
      - 0039_entity_collection_cascade.py
      - 0030_add_data_duplicate.py
      - 0038_remove_m2m_fields.py
      - 0027_data_purged.py
      - 0044_datalocation_to_filestorage.py
      - 0020_process_category_default.py
      - 0007_data_size.py
      - 0031_add_entity_duplicate.py
      - 0043_full_text_search.py
      - 0016_refactor_relations_2.py
      - 0029_storage_m2m.py
      - 0009_make_size_mandatory.py
      - utils.sql
      - __init__.py
      - 0045_unreferenced_storages.py
      - 0042_delete_obsolete_perms.py
      - 0034_callable_defaults.py
      - 0006_add_total_size.py
      - 0017_refactor_relations_3.py
      - 0033_move_purged.py
      - 0018_process_is_active.py
      - triggers_collection.sql
      - 0026_data_scheduled.py
      - 0014_track_resources.py
    - finders.py
    - tests
      - test_utils.py
      - test_access_api.py
      - test_docs.py
      - fixtures
        relationtypes.yaml
      - test_fields.py
      - test_serializers.py
      - test_commands.py
      - fields_test_app
        models.py
        __init__.py
        migrations
        0001_initial.py
        __init__.py
      - files
        contents.rst
        conf.py
        processes
        test.yaml
        errors.txt
      - test_executors.py
      - test_descriptors.py
      - test_filtering.py
      - test_runtime.py
      - test_models.py
      - test_purge.py
      - test_expression_filters.py
      - test_env_vars.py
      - __init__.py
      - test_secrets.py
      - expression_filters.py
      - test_relations.py
      - test_transfer.py
      - test_tool.py
      - test_validation.py
      - processes
        spawned.yml
        wrong_defaults
        tests.yml
        first_version
        tests.yml
        workflow.yml
        secrets.yml
        second_version
        tests.yml
        tests.yml
      - test_api.py
      - test_ordering.py
      - descriptors
        tests.yml
      - test_stats.py
      - test_resources.py
      - test_manager.py
    - signals.py
    - serializers
      - data.py
      - fields.py
      - relation.py
      - contributor.py
      - entity.py
      - descriptor.py
      - __init__.py
      - storage.py
      - collection.py
      - process.py
      - base.py
    - static
      - flow
        descriptorSchema.json
        typeSchema.json
        fieldSchema.json
        processSchema.json
    - routing.py
    - management
      - commands
        runlistener.py
        cleantestdir.py
        list_docker_images.py
        register.py
        __init__.py
        collecttools.py
      - __init__.py
- setup.cfg
- .travis.yml
- tests
  - .env
  - celery_conf.py
  - .test_upload
    - README.rst
  - .test_runtime
    - README.rst
  - manage.py
  - testing_credentials.tgz.enc
  - urls.py
  - __init__.py
  - settings.py
  - .gitignore
  - docker-compose.yml
  - .test_data
    - README.rst
- pyproject.toml
- .gitignore
- docs
  - composer.rst
  - proc.rst
  - intro.rst
  - images
  - storage.rst
  - api.rst
  - overview.rst
  - flow.rst
  - CHANGELOG.rst
  - example
    - example
      - processes
        example.yml
        example_basic.yml
        template_py_process.py
        minimal.yml
        all_fields.yml
  - ref.rst
  - index.rst
  - conf.py
  - contributing.rst
- MANIFEST.in
- tox.ini

"""Google storage connector."""
import base64
import datetime
import mimetypes
import os
from contextlib import suppress
from pathlib import Path

from google.api_core.exceptions import NotFound
from google.cloud import storage

from .baseconnector import BaseStorageConnector, validate_url, validate_urls


class GoogleConnector(BaseStorageConnector):
    """Google Cloud Storage storage connector."""

    REQUIRED_SETTINGS = ["bucket", "credentials"]

    def __init__(self, config: dict, name: str):
        """Initialize Google connector."""
        super().__init__(config, name)
        self.bucket_name = config["bucket"]
        self.supported_hash = ["crc32c", "md5"]
        self.hash_propery = {"md5": "md5_hash", "crc32c": "crc32c"}

    @validate_url
    def get_object_list(self, url):
        """Get a list of objects stored bellow the given URL."""
        url = os.path.join(url, "")
        return [
            Path(e.name).relative_to(url).as_posix()
            for e in self.bucket.list_blobs(prefix=url)
        ]

    def _initialize(self):
        """Perform initialization."""
        credentials = self.config["credentials"]
        self.client = storage.Client.from_service_account_json(credentials)
        self.bucket = self.client.get_bucket(self.bucket_name)

    def __getattr__(self, name):
        """Lazy initialize some attributes."""
        requires_initialization = ["client", "bucket"]
        if name not in requires_initialization:
            raise AttributeError()

        self._initialize()
        return getattr(self, name)

    @validate_urls
    @validate_url
    def delete(self, url, urls):
        """Remove objects."""
        # At most 1000 objects can be deleted at the same time.
        max_chunk_length = 1000
        for i in range(0, len(urls), max_chunk_length):
            with suppress(NotFound):
                next_chunk = urls[i : i + max_chunk_length]
                with self.client.batch():
                    for delete_url in next_chunk:
                        blob = self.bucket.blob(
                            os.fspath(self.base_path / url / delete_url)
                        )
                        blob.delete()

    @validate_url
    def push(self, stream, url, hash_type=None, data_hash=None):
        """Push data from the stream to the given URL."""
        url = os.fspath(url)
        mime_type = mimetypes.guess_type(url)[0]
        blob = self.bucket.blob(url)
        if hash_type is not None:
            assert hash_type in self.supported_hash
            prop = self.hash_propery[hash_type]
            setattr(blob, prop, data_hash)
        blob.upload_from_file(stream, content_type=mime_type)

    @validate_url
    def get(self, url, stream):
        """Get data from the given URL and write it into the given stream."""
        blob = self.bucket.blob(os.fspath(url))
        blob.download_to_file(stream)

    @validate_url
    def get_hash(self, url, hash_type):
        """Get the hash of the given type for the given object."""
        blob = self.bucket.get_blob(os.fspath(url))
        if blob is None:
            return None
        blob.update()
        if hash_type in self.hash_propery:
            prop = self.hash_propery[hash_type]
            return base64.b64decode(getattr(blob, prop)).hex()
        else:
            return blob.metadata[hash_type]

    @validate_url
    def get_hashes(self, url, hash_types):
        """Get the hash of the given type for the given object."""
        hashes = dict()
        blob = self.bucket.get_blob(os.fspath(url))
        if blob is None:
            return None
        blob.update()

        for hash_type in hash_types:
            if hash_type in self.hash_propery:
                prop = self.hash_propery[hash_type]
                hashes[hash_type] = base64.b64decode(getattr(blob, prop)).hex()
            else:
                hashes[hash_type] = blob.metadata[hash_type]
        return hashes

    @validate_url
    def set_hashes(self, url, hashes):
        """Set the  hashes for the given object."""
        blob = self.bucket.get_blob(os.fspath(url))
        blob.update()
        meta = blob.metadata or dict()
        hashes = {k: v for (k, v) in hashes.items() if k not in self.hash_propery}
        meta.update(hashes)
        blob.metadata = meta
        blob.update()

    @validate_url
    def exists(self, url):
        """Get if the object at the given URL exist."""
        return storage.Blob(bucket=self.bucket, name=os.fspath(url)).exists()

    @property
    def base_path(self):
        """Get a base path for this connector."""
        return Path("")

    @validate_url
    def presigned_url(self, url, expiration=60, force_download=False):
        """Create a presigned URL.

        The URL is used to obtain temporary access to the object ar the
        given URL using only returned URL.

        :param expiration: expiration time of the link (in seconds), default
            is one minute.

        :param force_download: force download.

        :returns: URL that can be used to access object or None.
        """
        content_disposition = "attachment" if force_download else "inline"
        query_parameters = {"response-content-disposition": content_disposition}
        blob = self.bucket.blob(os.fspath(url))
        response = blob.generate_signed_url(
            version="v4",
            expiration=datetime.timedelta(seconds=expiration),
            method="GET",
            virtual_hosted_style=True,
            query_parameters=query_parameters,
        )
        return response