python source code of annotation_database

HistomicsTK-master
- .circleci
  - config.yml
- sample_images
  - .gitignore
- codecov.yml
- LICENSE
- build_wheels.sh
- histomicstk
  - utils
    - del2.py
    - hessian.py
    - simple_mask.py
    - fit_poisson_mixture.py
    - compute_tile_foreground_fraction.py
    - convert_matrix_to_image.py
    - merge_colinear.py
    - gradient_diffusion.py
    - general_utils.py
    - exclude_nonfinite.py
    - __init__.py
    - tests
      - test_girder_convenience_utils.py
    - convert_image_to_matrix.py
    - girder_convenience_utils.py
    - sample_pixels.py
    - eigen.py
  - saliency
    - tissue_detection.py
    - cellularity_detection_superpixels.py
    - __init__.py
    - tests
      - test_saliency.py
    - cellularity_detection_thresholding.py
  - segmentation
    - rag_add_layer.py
    - rag_color.py
    - embed_boundaries.py
    - level_set
      - chan_vese.py
      - reg_edge.py
      - __init__.py
    - positive_pixel_count.py
    - rag.py
    - label
      - trace_object_boundaries.py
      - compact.py
      - shuffle.py
      - perimeter.py
      - area_open.py
      - dilate_xor.py
      - split.py
      - delete_border.py
      - condense.py
      - CMakeLists.txt
      - __init__.py
      - width_open.py
      - delete.py
      - _trace_object_boundaries_cython.pyx
    - __init__.py
    - nuclear
      - gvf_tracking.py
      - gaussian_voting.py
      - CMakeLists.txt
      - __init__.py
      - min_model.py
      - _max_clustering_cython.pyx
      - max_clustering.py
      - detect_nuclei_kofahi.py
  - cli
    - BackgroundIntensity
      - BackgroundIntensity.py
      - BackgroundIntensity.xml
      - __init__.py
    - slicer_cli_list.json
    - ComputeNucleiFeatures
      - ComputeNucleiFeatures.py
      - __init__.py
      - README.md
      - ComputeNucleiFeatures.xml
    - PositivePixelCount
      - PositivePixelCount.py
      - __init__.py
      - PositivePixelCount.xml
    - ColorDeconvolution
      - ColorDeconvolution.xml
      - ColorDeconvolution.py
      - __init__.py
    - SeparateStainsXuSnmf
      - SeparateStainsXuSnmf.xml
      - SeparateStainsXuSnmf.py
      - __init__.py
    - NucleiDetection
      - NucleiDetection.xml
      - __init__.py
      - NucleiDetection.py
    - docker-entrypoint.sh
    - __init__.py
    - utils.py
    - SeparateStainsMacenkoPCA
      - SeparateStainsMacenkoPCA.py
      - SeparateStainsMacenkoPCA.xml
      - __init__.py
    - NucleiClassification
      - NucleiClassification.xml
      - __init__.py
      - NucleiClassification.py
  - filters
    - shape
      - vesselness.py
      - clog.py
      - cdog.py
      - __init__.py
      - glog.py
    - __init__.py
    - edge
      - gaussian_grad.py
      - __init__.py
  - __init__.py
  - workflows
    - specific_workflows.py
    - workflow_runner.py
    - __init__.py
    - tests
      - test_workflow_runner.py
  - features
    - _compute_marginal_glcm_probs_cython.pyx
    - compute_global_cell_graph_features.py
    - compute_nuclei_features.py
    - compute_gradient_features.py
    - graycomatrixext.py
    - compute_fsd_features.py
    - CMakeLists.txt
    - __init__.py
    - compute_intensity_features.py
    - compute_haralick_features.py
    - compute_morphometry_features.py
  - preprocessing
    - augmentation
      - color_augmentation.py
      - __init__.py
    - color_normalization
      - deconvolution_based_normalization.py
      - reinhard_stats.py
      - background_intensity.py
      - reinhard.py
      - __init__.py
    - color_deconvolution
      - complement_stain_matrix.py
      - rgb_separate_stains_macenko_pca.py
      - stain_color_map.py
      - separate_stains_macenko_pca.py
      - rgb_separate_stains_xu_snmf.py
      - separate_stains_xu_snmf.py
      - color_deconvolution.py
      - __init__.py
      - find_stain_index.py
      - _linalg.py
      - color_convolution.py
    - __init__.py
    - tests
      - test_normalization_and_augmentation.py
    - color_conversion
      - rgb_to_hsi.py
      - sda_to_rgb.py
      - od_to_rgb.py
      - rgb_to_od.py
      - lab_mean_std.py
      - rgb_to_sda.py
      - lab_to_rgb.py
      - __init__.py
      - rgb_to_lab.py
  - annotations_and_masks
    - polygon_merger.py
    - annotations_to_masks_handler.py
    - masks_to_annotations_handler.py
    - annotation_database_parser.py
    - annotation_and_mask_utils.py
    - __init__.py
    - review_gallery.py
    - polygon_merger_v2.py
    - annotations_to_object_mask_handler.py
    - tests
      - test_review_gallery.py
      - test_annotation_and_mask_utils.py
      - test_annotations_to_object_mask_handler.py
      - test_polygon_merger.py
      - test_annotation_database_parser.py
      - test_annotations_to_masks_handler.py
      - test_masks_to_annotations_handler.py
    - pyrtree
      - __init__.py
      - rect.py
      - rtree.py
- README.rst
- setup.py
- requirements-dev.txt
- AUTHORS.rst
- CMakeLists.txt
- .travis.yml
- CONTRIBUTING.rst
- tests
  - disable_test_cli_results.py
  - test_blob_detection_filters.py
  - test_files
    - Easy1_nuclei_gradient_features.csv
    - Easy1_nuclei_haralick_features.csv
    - sample_GTcodes.csv
    - Easy1_nuclei_intensity_features.csv
    - Easy1_nuclei_fsd_features.csv
    - saliency_GTcodes.csv
    - annotations_and_masks
      - img
        polygon_merger_unmerged.JPG
        polygon_merger_merged.JPG
      - polygon_merger_roi_masks
    - Easy1_nuclei_morphometry_features.csv
  - test_color_deconvolution.py
  - test_cli_common.py
  - test_global_cell_graph_features.py
  - htk_test_utilities.py
  - test_color_normalization.py
  - data
    - L1.png.sha512
    - TCGA-A2-A0YE-01Z-00-DX1.8A2E3094-5755-42BC-969D-7F0A2ECA0F39.svs.sha512
    - sample_svs_image.TCGA-DU-6399-01A-01-TS1.e8eb65de-d63e-42db-af6f-14fefbbdf7bd.svs.sha512
    - TCGA-06-0129-01Z-00-DX3.bae772ea-dd36-47ec-8185-761989be3cc8.svs.sha512
    - Easy1_nuclei_stain.npz.sha512
    - Easy1_cdog_sigma_max.npz.sha512
    - TCGA-06-0129-01Z-00-DX3_fgnd_mask_lres.png.sha512
    - TCGA-06-0129-01Z-00-DX3_roi_nuclei_boundary.anot.sha512
    - Easy1_cdog_max.npz.sha512
    - TCGA-06-0129-01Z-00-DX3_roi_nuclei_bbox.anot.sha512
    - TCGA-A2-A0YE-01Z-00-DX1.8A2E3094-5755-42BC-969D-7F0A2ECA0F39.svs_annotations.json.sha512
    - Easy1_nuclei_fgnd_mask.npz.sha512
    - Easy1_clog_sigma_max.npz.sha512
    - Easy1_clog_max.npz.sha512
    - TCGA-A2-A0YE-01Z-00-DX1_GET_MergePolygons.svs_annotations.json.sha512
    - Easy1_nuclei_seg_kofahi.npy.sha512
    - Easy1.png.sha512
  - test_feature_extraction.py
  - __init__.py
  - test_nuclei_segmentation.py
  - test_glcm.py
  - Dockerfile-gc-tests
  - docker_setup.py
  - .dockerignore
  - docker-compose.yml
  - test_segmentation_label.py
  - test_color_conversion.py
- test_wheels.py
- pyproject.toml
- Dockerfile
- Dockerfile-wheels
- .gitignore
- docs
  - histomicstk.preprocessing.color_deconvolution.rst
  - histomicstk.filters.shape.rst
  - histomicstk.annotations_and_masks.masks_to_annotations_handler.rst
  - histomicstk.saliency.cellularity_detection_superpixels.rst
  - Makefile
  - make.bat
  - histomicstk.rst
  - histomicstk.utils.rst
  - examples
    - introducing_the_girder_api.rst
    - polygon_merger_using_rtree.ipynb
    - doc_files
    - segmentation_masks_to_annotations.ipynb
    - tips_for_scalable_annotation_rendering.rst
    - annotation_database_backup_and_sql_parser.ipynb
    - procedure_for_typical_annotation_project.rst
    - semantic_segmentation_superpixel_approach.ipynb
    - semantic_segmentation_color_thresholding_approach.ipynb
    - polygon_merger_from_tiled_masks.ipynb
    - workflows.ipynb
  - histomicstk.filters.edge.rst
  - histomicstk.preprocessing.color_conversion.rst
  - histomicstk.segmentation.level_set.rst
  - histomicstk.segmentation.nuclear.rst
  - histomicstk.segmentation.label.rst
  - histomicstk.segmentation.rst
  - histomicstk.workflows.rst
  - histomicstk.annotations_and_masks.annotation_and_mask_utils.rst
  - histomicstk.preprocessing.color_normalization.rst
  - histomicstk.preprocessing.augmentation.rst
  - examples.rst
  - histomicstk.workflows.workflow_runner.rst
  - histomicstk.preprocessing.rst
  - make_docs.sh
  - histomicstk.annotations_and_masks.annotations_to_object_segmentation_masks.rst
  - histomicstk.annotations_and_masks.review_gallery.rst
  - histomicstk.saliency.cellularity_detection_thresholding.rst
  - authors.rst
  - index.rst
  - conf.py
  - histomicstk.annotations_and_masks.annotations_to_semantic_segmentation_masks.rst
  - histomicstk.workflows.specific_workflows.rst
  - histomicstk.annotations_and_masks.annotation_database_parser.rst
  - api-docs.rst
  - histomicstk.annotations_and_masks.polygon_merger.rst
  - histomicstk.features.rst
  - histomicstk.filters.rst
  - histomicstk.segmentation.positive_pixel_count.rst
  - contributing.rst
  - installation.rst
  - histomicstk.saliency.rst
  - histomicstk.annotations_and_masks.rst
  - histomicstk.saliency.tissue_detection.rst
- .dockerignore
- tox.ini

# -*- coding: utf-8 -*-
"""
Created on Thu Dec 12 13:19:18 2019

@author: tageldim
"""
import os
import json
import copy
from pandas import DataFrame
from sqlalchemy import create_engine
from sqlalchemy.types import Integer, String, Boolean
from histomicstk.workflows.workflow_runner import (
    Workflow_runner, Slide_iterator)
from histomicstk.utils.girder_convenience_utils import (
    get_absolute_girder_folderpath)
from histomicstk.annotations_and_masks.annotation_and_mask_utils import (
    parse_slide_annotations_into_tables)


# %%===========================================================================
# Helper functions


def _add_item_to_sqlite(dbcon, item):
    # modify item info to prep for appending to sqlite table
    item_info = copy.deepcopy(item)
    item_info['largeImage'] = str(item_info['largeImage'])

    item_info_dtypes = {
        '_id': String(),
        '_modelType': String(),
        'baseParentId': String(),
        'baseParentType': String(),
        'copyOfItem': String(),
        'created': String(),
        'creatorId': String(),
        'description': String(),
        'folderId': String(),
        'largeImage': String(),
        'name': String(),
        'size': Integer(),
        'updated': String(),
    }

    # in case anything is not in the schema, drop it
    item_info = {
        k: v for k, v in item_info.items()
        if k in item_info_dtypes.keys()}

    # convert to df and add to items table
    item_info_df = DataFrame.from_dict(item_info, orient='index').T
    item_info_df.to_sql(
        name='items', con=dbcon, if_exists='append',
        dtype=item_info_dtypes, index=False)


def _add_folder_to_sqlite(dbcon, folder_info):
    # modify folder info to prep for appending to sqlite table
    folder_info_dtypes = {
        '_accessLevel': Integer(),
        '_id': String(),
        '_modelType': String(),
        'baseParentId': String(),
        'baseParentType': String(),
        'created': String(),
        'creatorId': String(),
        'description': String(),
        'name': String(),
        'parentCollection': String(),
        'parentId': String(),
        'public': Boolean(),
        'size': Integer(),
        'updated': String(),
        'folder_path': String(),
    }

    # in case anything is not in the schema, drop it
    folder_info = {
        k: v for k, v in folder_info.items()
        if k in folder_info_dtypes.keys()}

    # convert to df and add to items table
    folder_info_df = DataFrame.from_dict(folder_info, orient='index').T
    folder_info_df.to_sql(
        name='folders', con=dbcon, if_exists='append',
        dtype=folder_info_dtypes, index=False)


def _add_annotation_docs_to_sqlite(dbcon, annotation_docs, item):
    # add full item path for convenience
    annotation_docs.loc[:, "item_name"] = item['name']

    # save tables to sqlite
    annotation_docs.to_sql(
        name='annotation_docs', con=dbcon, if_exists='append',
        dtype={
            'annotation_girder_id': String(),
            '_modelType': String(),
            '_version': Integer(),
            'itemId': String(),
            'item_name': String(),
            'created': String(),
            'creatorId': String(),
            'public': Boolean(),
            'updated': String(),
            'updatedId': String(),
            'groups': String(),
            'element_count': Integer(),
            'element_details': Integer(), },
        index=False,
    )


def _add_annotation_elements_to_sqlite(dbcon, annotation_elements):
    # drop index relative to JSON since its pretty arbitrary and would
    # change if the same girder client was used to get annotations twice
    # the actual girder ID string is what really matters and should be used
    annotation_elements.drop(
        labels=['annidx', 'elementidx'], axis=1, inplace=True)

    annotation_elements.to_sql(
        name='annotation_elements', con=dbcon, if_exists='append',
        dtype={
            'annotation_girder_id': String(),
            'element_girder_id': String(),
            'type': String(),
            'group': String(),
            'label': String(),
            'color': String(),
            'xmin': Integer(),
            'xmax': Integer(),
            'ymin': Integer(),
            'ymax': Integer(),
            'bbox_area': Integer(),
            'coords_x': String(),
            'coords_y': String(), },
        index=False,
    )


def parse_annotations_to_local_tables(
        item, annotations, local, monitorPrefix='',
        save_csv=True, save_sqlite=False, dbcon=None):
    """Parse loaded annotations for slide into tables.

    Parameters
    ----------
    item : dict
        girder response with item information

    annotations : dict
        loaded annotations

    local : str
        local directory

    save_csv : bool
        whether to use histomicstk.annotations_and_masks.annotation_and_mask.
        parse_slide_annotations_into_tables() to get a tabular representation
        (including some simple calculations like bounding box) and save
        the output as two csv files, one representing the annotation documents
        and the other representing the actual annotation elements (polygons).

    save_sqlite : bool
        whether to save the backup into an sqlite database

    dbcon : sqlalchemy.create_engine.connect() object
        IGNORE THIS PARAMETER!! This is used internally.

    monitorPrefix : str
        text to prepend to printed statements

    """
    print("%s: parse to tables" % monitorPrefix)
    savepath_base = os.path.join(local, item['name'])
    annotation_docs, annotation_elements = \
        parse_slide_annotations_into_tables(annotations)

    if save_csv:
        annotation_docs.to_csv(savepath_base + '_docs.csv')
        annotation_elements.to_csv(savepath_base + '_elements.csv')

    if save_sqlite:
        assert dbcon is not None, "You must connect to database first!"
        _add_annotation_docs_to_sqlite(dbcon, annotation_docs, item)
        _add_annotation_elements_to_sqlite(dbcon, annotation_elements)

# %%===========================================================================
# Workflow at a single slide level


def dump_annotations_workflow(
        gc, slide_id, local, monitorPrefix='',
        save_json=True, save_sqlite=False, dbcon=None,
        callback=None, callback_kwargs=None):
    """Dump annotations for single slide into the local folder.

    Parameters
    -----------
    gc : girder_client.GirderClient
        authenticated girder client instance

    slide_id : str
        girder id of item (slide)

    monitorPrefix : str
        prefix to monitor string

    local : str
        local path to dump annotations

    save_json : bool
        whether to dump annotations as json file

    save_sqlite : bool
        whether to save the backup into an sqlite database

    dbcon : sqlalchemy.create_engine.connect() object
        IGNORE THIS PARAMETER!! This is used internally.

    callback : function
        function to call that takes in AT LEAST the following params
        - item: girder response with item information
        - annotations: loaded annotations
        - local: local directory
        - monitorPrefix: string

    callback_kwargs : dict
        kwargs to pass along to callback

    """
    callback_kwargs = callback_kwargs or {}
    try:
        item = gc.get('/item/%s' % slide_id)

        savepath_base = os.path.join(local, item['name'])

        # dump item information json
        if save_json:
            print("%s: save item info" % monitorPrefix)
            with open(savepath_base + '.json', 'w') as fout:
                json.dump(item, fout)

        # save folder info to sqlite
        if save_sqlite:
            _add_item_to_sqlite(dbcon, item)

        # pull annotation
        print("%s: load annotations" % monitorPrefix)
        annotations = gc.get('/annotation/item/' + item['_id'])

        if annotations is not None:

            # dump annotations to JSON in local folder
            if save_json:
                print("%s: save annotations" % monitorPrefix)
                with open(savepath_base + '_annotations.json', 'w') as fout:
                    json.dump(annotations, fout)

            # run callback
            if callback is not None:
                print("%s: run callback" % monitorPrefix)
                callback(
                    item=item, annotations=annotations, local=local,
                    dbcon=dbcon, monitorPrefix=monitorPrefix,
                    **callback_kwargs)

    except Exception as e:
        print(str(e))

# %%===========================================================================
# Main method


def dump_annotations_locally(
        gc, folderid, local, save_json=True,
        save_sqlite=False, dbcon=None,
        callback=None, callback_kwargs=None):
    """Dump annotations of folder and subfolders locally recursively.

    This reproduces this tiered structure locally and (possibly) dumps
    annotations there. Adapted from Lee A.D. Cooper

    Parameters
    -----------
    gc : girder_client.GirderClient
        authenticated girder client instance

    folderid : str
        girder id of source (base) folder

    local : str
        local path to dump annotations

    save_json : bool
        whether to dump annotations as json file

    save_sqlite : bool
        whether to save the backup into an sqlite database

    dbcon : sqlalchemy.create_engine.connect() object
        IGNORE THIS PARAMETER!! This is used internally.

    callback : function
        function to call that CAN accept AT LEAST the following params
        - item: girder response with item information
        - annotations: loaded annotations
        - local: local directory
        - monitorPrefix: string
        - dbcon: sqlalchemy.create_engine.connect() object
        You can just add kwargs at the end of your callback definition
        for simplicity.

    callback_kwargs : dict
        kwargs to pass along to callback. DO NOT pass any of the parameters
        item, annotations, local, monitorPrefix, or dbcon as these will be
        internally passed. Just include any specific paremeters for the
        callback. See parse_annotations_to_local_tables() above for
        an example of a callback and the unir test of this function.

    """
    callback_kwargs = callback_kwargs or {}
    assert(save_json or save_sqlite), "must save results somehow!"
    monitor = os.path.basename(local)

    # get folder info
    folder_info = gc.get("folder/%s" % folderid)
    folder_info['folder_path'] = get_absolute_girder_folderpath(
        gc=gc, folder_info=folder_info)

    # connect to sqlite database -- only first stack does this
    if save_sqlite and (dbcon is None):
        db_path = os.path.join(local, folder_info['name'] + ".sqlite")
        sql_engine = create_engine('sqlite:///' + db_path, echo=False)
        dbcon = sql_engine.connect()

    # save folder information json
    if save_json:
        print("%s: save folder info" % monitor)
        savepath = os.path.join(local, folder_info['name'] + '.json')
        with open(savepath, 'w') as fout:
            json.dump(folder_info, fout)

    # save folder info to sqlite
    if save_sqlite:
        _add_folder_to_sqlite(dbcon, folder_info)

    # pull annotations for each slide in folder
    workflow_runner = Workflow_runner(
        slide_iterator=Slide_iterator(
            gc, source_folder_id=folderid,
            keep_slides=None,
        ),
        workflow=dump_annotations_workflow,
        workflow_kwargs={
            'gc': gc,
            'local': local,
            'save_json': save_json,
            'save_sqlite': save_sqlite,
            'dbcon': dbcon,
            'callback': callback,
            'callback_kwargs': callback_kwargs,
        },
        monitorPrefix=monitor)

    workflow_runner.run()

    # for each subfolder, create a new folder locally and call self
    for folder in gc.listFolder(parentId=folderid):

        # create folder in local
        new_folder = os.path.join(local, folder['name'])
        os.mkdir(new_folder)

        # call self with same prameters
        dump_annotations_locally(
            gc=gc, folderid=folder['_id'], local=new_folder,
            save_json=save_json, save_sqlite=save_sqlite, dbcon=dbcon,
            callback=callback, callback_kwargs=callback_kwargs)

# %%===========================================================================