python source code of STARROrderMedConversion

CDSS-master
- TestCDSS.py
- setup
  - stride
    - rxnorm
      - test
        TestRxNormClient.py
      - RxNormClient.py
      - __init__.py
    - psql
      - indices
        stride_orderset_order_proc.indices.sql
        build_indices.sh
        stride_patient_encounter.indices.sql
        stride_medication_mpi.indices.sql
        stride_order_results.indices.sql
        stride_orderset_order_med.indices.sql
        stride_patient.indices.sql
        stride_flowsheet.indices.sql
        stride_adt.indices.sql
        stride_note.indices.sql
        stride_order_med.indices.sql
        stride_chargemaster.indices.sql
        stride_order_proc.indices.sql
      - dump_stride.sh
      - schemata
        stride_icd10_cm.schema.sql
        stride_adt.schema.sql
        stride_patient.schema.sql
        stride_io_flowsheet.schema.sql
        stride_income.schema.sql
        stride_treatment_team.schema.sql
        stride_insurance.schema.sql
        stride_medication_mpi.schema.sql
        stride_orderset_order_med.schema.sql
        stride_mapped_meds.schema.sql
        stride_chargemaster.schema.sql
        stride_order_proc.schema.sql
        stride_note.schema.sql
        stride_dx_list.schema.sql
        stride_admit.schema.sql
        stride_drg.schema.sql
        stride_admit_vital.schema.sql
        stride_order_medmixinfo.schema.sql
        stride_icd9_cm.schema.sql
        stride_orderset_order_proc.schema.sql
        stride_culture_micro.schema.sql
        stride_patient_encounter.schema.sql
        stride_order_med.schema.sql
        stride_flowsheet.schema.sql
        stride_preadmit_med.schema.sql
        stride_order_results.schema.sql
      - restore_stride.sh
    - box
      - test
        TestBoxClient.py
        __init__.py
        box-upload-test.txt
        box-test-verify
        box-test-subdirectory
        box-upload-test.txt
        box-download-test.txt
        box-download-test.txt
        box-download-test.txt
      - __init__.py
      - box-API.md
      - BoxClient.py
    - stride.md
    - clinical_item
      - clinical_item_tables.md
      - psql
        indices
        item_collection_item.indices.sql
        build_indices.sh
        patient_item.indices.sql
        clinical_item_association.indices.sql
        clinical_item.indices.sql
        clinical_item_category.indices.sql
        patient_item_collection_link.indices.sql
        restore-clinical_item.md
        dump_clinical_item.sh
        schemata
        backup_link_patient_item.schema.sql
        clinical_item_category.schema.sql
        item_collection_item.schema.sql
        item_collection.schema.sql
        clinical_item_association.schema.sql
        clinical_item_link.schema.sql
        data_cache.schema.sql
        clinical_item.schema.sql
        collection_type.schema.sql
        patient_item.schema.sql
        order_result_stat.schema.sql
        patient_item_collection_link.schema.sql
        restore_clinical_item.sh
      - __init__.py
      - ClinicalItemDataLoader.py
    - __init__.py
    - core
      - test
        test_clean_data_file.csv
        test_raw_data_file.csv
        __init__.py
        TestStrideLoader.py
      - StrideLoader.py
      - __init__.py
      - StrideLoaderParams.py
  - installPythonLibraries.py
  - webAppSetupNotes.txt
  - windows.profile.bat
  - setup.sh
- LocalEnv.py.template
- LICENSE
- README.md
- scripts
  - OrdersetExpectedVsReality
    - Data
    - Code
      - STEP2A_count_co_occurring_orders_not_in_orderset.py
      - STEP2C_compute_median_ordering_prob_cutoffs.py
      - STEP2B_compute_co_occurring_statistics.py
      - STEP1B_define_orders_in_ordersets.py
      - STEP3C_filter_a_la_carte_orders.py
      - STEP1A_get_orderset_usage_instances.py
      - STEP3B_count_co_occurring_orders_already_in_orderset.py
      - STEP2D_filter_co_occurring_orders.py
      - STEP3A_find_order_ids_associated_with_ordersets.py
  - TSI_prediction
    - TSI_Prediction_scripts.sql
  - Moore_Stroke
    - Vitals.sql
    - ExampleAccessLogJoin.sql
    - HistoryOfAfib.sql
    - Demographics.sql
    - Labels.sql
    - Aspirin.sql
  - LabCulturePrediction
    - MRSAPredictionPipeline.py
    - SusceptibilityPredictionPipeline.py
    - LabCultureMatrix.py
    - PersonalizedAntibiogramPredictionPipeline.py
    - MRSAMatrix.py
    - Susceptibility_Feature_Names.csv
    - AntiBiogramMatrix.py
    - __init__.py
    - README.md
    - LabCulturePredictionPipeline.py
    - PersonalizedAntibiogramMatrix.py
    - DecisionMakingClass
      - batch_driver.sh
      - cost_analysis.py
      - label_generation.py
      - UtilityModel.py
      - feature_engineering.py
      - cohort_queries.py
      - SQL
        MedFeatures.sql
        LabFeatures.sql
        DemographicFeatures.sql
        ProcedureFeatures.sql
        MicrobiologyOrderFeatures.sql
        DxFeatures.sql
        ImagingFeatures.sql
      - predictive_models.py
      - run_experiments.py
    - AntiBiogramPredictionPipeline.py
  - DevWorkshop
    - ReadMe.PHSDataAccess.txt
    - ReadMe.SQLQuery.txt
    - ReadMe.GoogleCloudDevEnvironment.txt
    - sampleData
      - abstractLines.txt
      - abstracts.xml
      - abstracts.parser.py
    - ReadMe.AWSDevEnvironment.txt
    - UnitTesting
      - UnitTesting.R
      - TestApplicationModule.py
      - __init__.py
      - ApplicationModule.py
    - AWSDevEnvironment
      - batchDriver.py
      - ExampleQueryApp.py
      - batchDriver.sh
    - screening
      - sqlQuery
        TestSQLQueryExample.py
        SQLQueryExample.py
      - parsing
        ParsingExample.py
        TestParsingExample.py
      - associationStats
        AssociationStatsExample.py
        PartD_Prescriber_PUF_NPI_DRUG_15.sample.zip
        TestAssociationStatsExample.py
    - STARR-OMOP
      - StarterNotes.txt
      - WorkshopNotes.03.txt
      - WorkshopNotes.01.txt
      - WorkshopNotes.02.txt
    - ReadMe.GoogleCloud-BigQuery-VPC.txt
    - ReadMe.TestFirstDev.txt
    - GoogleCloudPlatform
      - results
        placeholder.txt
      - log
        placeholder.txt
      - batchDriver.py
      - ExampleQueryApp.py
      - batchDriver.sh
      - moreExamples
        auth_quiver.py
        auth_cloud.py
        readme.md
        requirements.txt
        sleep_loop.py
        batch
        cloud_read.py
        cloud_log.sh
        generateDriverScript.py
        cloud_write.py
        cloud_driver.sh
    - statsSimCalc
      - powerCalculation.R
      - powerCalculation.examples.R
    - ReadMe.PivotCharts.txt
    - ReadMe.PowerCalculationSimulation.txt
    - DevWorkshopR
      - unit_testing.R
      - 02_unit_testing.R
      - markup_python_R.Rmd
      - command_line02.R
      - mtcars2.csv
      - unit_testing_a.R
      - unit_testing_b.R
      - readme.md
      - command_line01.R
      - unit_testing_c.R
  - GoogleCloud
    - Sheets
      - test_sheets_to_df.py
      - sheets_to_df.py
    - specialty_referral
      - nested_referral_script.R
      - cardiology_referral_script.R
      - gastroenterology_referral_script.R
      - gastroenterology_referral.sql
      - cardiology_referral.sql
      - readme.md
    - StrokeCohort
      - stroke_aggregate.R
      - readme.md
      - ministudy.sql
    - __init__.py
    - readme.md
    - BQ
      - lpch
        upload_lpch_procedure.py
        preprocess_flowsheet.py
        directory_flowsheet_setup.sh
        upload_lpch_demographics.py
        upload_lpch_path_report.py
        upload_template.py
        upload_lpch_mar.py
        upload_lpch_order_med.py
        preprocess_upload_flowsheet.py
        date_utc_change.sql
        upload_lpch_diagnosis.py
        upload_lpch_ndc_code.py
        upload_lpch_drg_code.py
        upload_lpch_clinical_note_meta.py
        upload_lpch_radiology_meta.py
        upload_new_date_column.py
        upload_lpch_lab_result.py
      - upload_alert_history.py
      - upload_alt_drug_allergy.py
      - google_auth.R
      - load_stride_tables.py
      - BigQueryConnect_py2.py
      - ahrq
        create_ahrq.sh
        upload_ahrq.py
        create_ahrq_table.py
        ahrq_citation.txt
        upload_ahrq_diag.py
      - test
        check_row_counts.py
        testBigQueryConnect.py
        testSTARRDemo.py
      - auth_cloud.py
      - BigQueryConnect.py
      - upload_alt_com_action.py
      - alert_upload.py
      - __init__.py
      - upload_alerts_orders.py
      - readme.md
      - requirements.txt
      - shc
        preprocess_shc_order_proc.py
        directory_shc_setup.sh
        upload_shc_order_proc.py
      - upload_culture_sensitivity.py
    - cloud_sdk
      - readme.md
  - Archive
    - PressGaney
      - Step.20.LogisticRegressionFormatOutput.R
      - Step.10.DataLoadingRegression.R
    - ClinicNet
      - utils
        summarize.py
        pca.py
      - data_processing
        compute_stats.py
        make_order_set_responses.py
        add_dates.py
        stratify_data_by_time.py
        prep_batches.py
        feature_selection.py
        make_batches.py
      - bootstrap.py
      - clinicnet_model
        utils
        datagenerator.py
        generator_cache.ipynb
      - README.md
      - preprocessing
        data_split.sh
        make_hdf5.py
      - run_clinicnet.py
    - ResidentRotationEHRUsage
      - README
      - Data
        ehr_action_categories.csv
        rotations.csv
        ehr_action_descriptions.tsv
        Medical_Student_EHR_Use_Provider_Level.xlsx
      - Code
        .RData
        .ipynb_checkpoints
        Daily EHR Time Spent Analysis Should We Filter-checkpoint.ipynb
        generate_usage_spreadsheet.py
        compute_number_ehr_actions_per_24hour_cycle.py
        .Rhistory
        compute_summary_stats.py
    - OpioidRx
      - extractionNotes.longitudinal.txt
      - problemListAssociations.txt
      - extractionNotes.txt
      - problemAssociations.py
      - extractData.py
      - logisticRegression.R
      - compositeAssociations.py
      - machineLearn.py
      - setupNotes.txt
      - dataNotes.txt
      - preprocessData.py
    - SimulationRecommender
      - RecommenderPipeline
        syncPipeline
        readme.md
        UnitTests
        oneClick.py
        tables_figures_part2.R
        unique_grading.R
        tracker_script_v5.py
        clinicalRecommenderGradingTests.py
        tracker_script.py
        clinicalRecommenderTests.py
        tracker_script_v4.py
        CPOETrackerAnalysis.py
        configuration.py
        readme.md
        helper.py
        tables_figures.R
        readme.md
      - simulateCases.py
      - interview_script.md
      - simDataReader.py
      - parser.R
      - simTest.md
      - readme.md
      - UserTestingNotes.txt
    - ICUDNR
      - simulateData.py
      - RAnalysis
        Step.40.CoxRegression.R
        timedep_cox.R
        Step.20.Demographics.R
        Step.30.Imputation-Amelia.R
        Example.FeatureMatrixProcessing.R
        DNR_aug.sas
        Step.10.DataLoading.R
      - summaryStats.py
      - extractData.py
      - formatData.py
    - HighMortalityVsLowMortalityVsCrowd
      - PropensityScoreMatching
        remap_ids_matching.py
        matching.Rmd
        extract_unmatched_patients.py
      - ExtractPatientFeatureMatrix
        extract_physician_patient_map.py
        extract_admission_discharge_flow.py
        extract_diagnosis_codes.py
        reorder_data.py
        clean_data.py
        extract_mortality.py
        extract_last_recorded_admission_diagnosis.py
        extract_age.py
        extract_treatment_team.py
        extract_encounter_data.py
      - BetterThanExpectedEvaluation
        extract_better_than_expected_patients.py
        extract_mortality_unbinned.py
        aggregate_evaluation_metrics.py
        Better_Than_Expected_Evaluation_Line_Plot.ipynb
        evaluate_association_models_recommended_items_against_verify_items.py
        evaluate_ordersets_against_verify_items.py
        prepare_query_and_verify_items.py
        join_probs_mortality_delta.py
      - README
      - Miscellaneous
        compute_observed_vs_expected_mortalityORreadmission.py
        extract_30_day_readmissions.py
        mortality_or_readmission_predictor.Rmd
        extract_lengths_of_stay.py
      - BuildExpectedMortalityPredictor
        remap_ids_mortality_probs.py
        mortality_predictor.Rmd
        compute_observed_vs_expected_mortality.py
    - SepsisICU
      - extractData.py
      - exampleDates.py
      - formatData.py
    - Ferritin
      - plotAssociations.py
      - plotDistributions.py
      - extractionNotes.txt
      - extractData.py
      - formatData.py
      - scratch.py
      - machineLearn.py
    - ExpertsVsEveryone
      - insertUpdates.sh
      - referenceData
        candidateOrders.tab
        item_collection_item.referenceOrders.update.tab
        scratchNotes.txt
        item_collection.update.tab
        generateWorksheetData.py
        item_collection_item.diagnosisLink.update.tab
        oldReferences.tab
        SetupReferenceData.txt
        oldReferenceConversion.py
        updateReferenceData.py
        orderSetQueries.txt
      - scripts
        README.txt
        Figures
        generate_plots.sh
        matching_parse.py
        plot_input.py
        Training
        get_predictions.py
        run_association.sh
        stratify.py
        Miscellananeous
        extract_unmatched_patient_cohorts.py
        extract_diagnosis_codes.py
        Matching
        matching.Rmd
        extractData_modified.py
        clean_data.py
        extract_age.py
        remap_ids.py
      - pipeline
        everyone_patients_matched_remapped_excluding_ordersets.csv
        README.txt
        everyone_patients_with_ordersets.csv
        Figures
        generate_plots.sh
        matching_parse.py
        plot_input.py
        Training
        get_predictions.py
        run_association.sh
        stratify.py
        Miscellananeous
        extract_unmatched_patient_cohorts.py
        extract_diagnosis_codes.py
        Matching
        matching.Rmd
        extractData_modified.py
        clean_data.py
        extract_age.py
        remap_ids.py
      - Matching_Function.Rmd
    - LabTestAnalysis
      - machine_learning
        extraction
        mapLabToResultCodes.py
        LabNormalityMatrix.py
        generateLabNormalityMatrices.py
        FeatureMatrixChangeLabels.py
        __init__.py
        LabChangeMatrix.py
        helpful_scripts
        look_RF_structure.py
        ml_utils.py
        pipeline.py
        utils_UCSF.py
        data_conversion
        map_UMich_panel_raw2code.csv
        map_component.csv
        map_panel.csv
        map_UCSF_panel_raw2code.csv
        map_panel_code2description.csv
        map_UCSF_component_raw2code.csv
        map_team.csv
        map_cormobidity.csv
        map_vitals.csv
        map_component_code2description.csv
        map_UMich_component_raw2code.csv
        prepareData_NonSTRIDE.py
        utils_UMich.py
        pipelineResults
        LABTSH_logistic_regression_c=1.0.eps
        LABTSH_random_forest_n_estimators=10,_max_depth=5.eps
        LABTSH_random_forest_n_estimators=10,_max_depth=15.eps
        LABTSH_random_forest_n_estimators=30,_max_depth=15.eps
        LABFER_random_forest_n_estimators=30,_max_depth=5.eps
        LABFER_random_forest_n_estimators=5,_max_depth=15.eps
        LABTSH_ada_boost_n_estimators=100,_learning_rate=0.01.eps
        LABSPLAC_ada_boost_n_estimators=10,_learning_rate=0.01.eps
        LABNTBNP_ada_boost_n_estimators=10,_learning_rate=0.01.eps
        LABNTBNP_random_forest_n_estimators=10,_max_depth=10.eps
        LABNTBNP_ada_boost_n_estimators=100,_learning_rate=0.01.eps
        LABFER_ada_boost_n_estimators=10,_learning_rate=1.eps
        LABSPLAC_ada_boost_n_estimators=100,_learning_rate=1.eps
        LABTSH_random_forest_n_estimators=5,_max_depth=15.eps
        LABSPLAC_random_forest_n_estimators=30,_max_depth=10.eps
        LABSPLAC_random_forest_n_estimators=30,_max_depth=15.eps
        LABFER_ada_boost_n_estimators=50,_learning_rate=1.eps
        LABTSH_random_forest_n_estimators=5,_max_depth=5.eps
        LABSPLAC_random_forest_n_estimators=10,_max_depth=5.eps
        LABNTBNP_random_forest_n_estimators=30,_max_depth=15.eps
        LABFER_decision_tree_max_depth=50.eps
        LABTSH_random_forest_n_estimators=10,_max_depth=10.eps
        LABTSH_logistic_regression_c=10.0.eps
        LABTSH_gaussian_naive_bayes.eps
        LABNTBNP_random_forest_n_estimators=10,_max_depth=15.eps
        topKPrecision.py
        LABNTBNP_random_forest_n_estimators=30,_max_depth=10.eps
        LABNTBNP_gaussian_naive_bayes.eps
        LABTSH_ada_boost_n_estimators=10,_learning_rate=1.eps
        LABSPLAC_random_forest_n_estimators=30,_max_depth=5.eps
        LABSPLAC_logistic_regression_c=10.0.eps
        LABSPLAC_random_forest_n_estimators=10,_max_depth=15.eps
        LABNTBNP_ada_boost_n_estimators=100,_learning_rate=1.eps
        LABNTBNP_logistic_regression_c=0.1.eps
        LABFER_random_forest_n_estimators=10,_max_depth=15.eps
        LABNTBNP_random_forest_n_estimators=5,_max_depth=10.eps
        LABNTBNP_ada_boost_n_estimators=10,_learning_rate=1.eps
        LABSPLAC_random_forest_n_estimators=5,_max_depth=10.eps
        LABSPLAC_decision_tree_max_depth=5.eps
        LABFER_random_forest_n_estimators=5,_max_depth=10.eps
        LABTSH_random_forest_n_estimators=30,_max_depth=10.eps
        LABFER_random_forest_n_estimators=10,_max_depth=10.eps
        LABSPLAC_logistic_regression_c=1.0.eps
        LABSPLAC_decision_tree_max_depth=20.eps
        LABSPLAC_logistic_regression_c=0.1.eps
        LABSPLAC_ada_boost_n_estimators=50,_learning_rate=1.eps
        LABSPLAC_random_forest_n_estimators=5,_max_depth=5.eps
        LABNTBNP_ada_boost_n_estimators=100,_learning_rate=0.1.eps
        LABFER_random_forest_n_estimators=30,_max_depth=15.eps
        LABTSH_logistic_regression_c=0.1.eps
        LABTSH_random_forest_n_estimators=30,_max_depth=5.eps
        LABFER_random_forest_n_estimators=10,_max_depth=5.eps
        LABSPLAC_random_forest_n_estimators=10,_max_depth=10.eps
        LABSPLAC_decision_tree_max_depth=50.eps
        accuracy_f1_scores.txt
        LABNTBNP_ada_boost_n_estimators=50,_learning_rate=0.01.eps
        LABFER_decision_tree_max_depth=20.eps
        LABSPLAC_random_forest_n_estimators=5,_max_depth=15.eps
        LABNTBNP_logistic_regression_c=10.0.eps
        LABNTBNP_random_forest_n_estimators=10,_max_depth=5.eps
        LABTSH_ada_boost_n_estimators=10,_learning_rate=0.1.eps
        LABNTBNP_random_forest_n_estimators=5,_max_depth=5.eps
        LABTSH_ada_boost_n_estimators=10,_learning_rate=0.01.eps
        LABSPLAC_ada_boost_n_estimators=100,_learning_rate=0.01.eps
        LABNTBNP_ada_boost_n_estimators=50,_learning_rate=1.eps
        LABNTBNP_ada_boost_n_estimators=50,_learning_rate=0.1.eps
        LABNTBNP_ada_boost_n_estimators=10,_learning_rate=0.1.eps
        LABNTBNP_decision_tree_max_depth=20.eps
        LABFER_ada_boost_n_estimators=100,_learning_rate=0.1.eps
        LABFER_ada_boost_n_estimators=100,_learning_rate=1.eps
        LABFER_ada_boost_n_estimators=50,_learning_rate=0.01.eps
        LABSPLAC_gaussian_naive_bayes.eps
        LABTSH_decision_tree_max_depth=5.eps
        LABTSH_ada_boost_n_estimators=50,_learning_rate=1.eps
        LABFER_logistic_regression_c=0.1.eps
        LABFER_ada_boost_n_estimators=100,_learning_rate=0.01.eps
        LABNTBNP_random_forest_n_estimators=5,_max_depth=15.eps
        LABFER_logistic_regression_c=1.0.eps
        LABSPLAC_ada_boost_n_estimators=50,_learning_rate=0.01.eps
        LABNTBNP_random_forest_n_estimators=30,_max_depth=5.eps
        LABSPLAC_ada_boost_n_estimators=10,_learning_rate=1.eps
        LABFER_random_forest_n_estimators=30,_max_depth=10.eps
        LABTSH_decision_tree_max_depth=20.eps
        LABFER_ada_boost_n_estimators=50,_learning_rate=0.1.eps
        LABTSH_ada_boost_n_estimators=50,_learning_rate=0.01.eps
        LABSPLAC_ada_boost_n_estimators=10,_learning_rate=0.1.eps
        LABTSH_random_forest_n_estimators=5,_max_depth=10.eps
        LABFER_decision_tree_max_depth=5.eps
        LABTSH_ada_boost_n_estimators=100,_learning_rate=0.1.eps
        LABTSH_ada_boost_n_estimators=100,_learning_rate=1.eps
        LABTSH_ada_boost_n_estimators=50,_learning_rate=0.1.eps
        LABTSH_decision_tree_max_depth=50.eps
        LABNTBNP_decision_tree_max_depth=5.eps
        logistic_regression.eps
        LABNTBNP_logistic_regression_c=1.0.eps
        LABSPLAC_ada_boost_n_estimators=100,_learning_rate=0.1.eps
        LABFER_gaussian_naive_bayes.eps
        LABSPLAC_ada_boost_n_estimators=50,_learning_rate=0.1.eps
        LABFER_random_forest_n_estimators=5,_max_depth=5.eps
        LABFER_ada_boost_n_estimators=10,_learning_rate=0.1.eps
        LABNTBNP_decision_tree_max_depth=50.eps
        LABFER_logistic_regression_c=10.0.eps
        LABFER_ada_boost_n_estimators=10,_learning_rate=0.01.eps
        LabNormalityPredictionPipeline.py
        extra
        LABFER.txt
        cols_labsplac.csv
        LABSPLAC.txt
        cols_labtsh.csv
        cols_labntbnp.csv
        cols_labfer.csv
        LABTSH.txt
        prepareData.py
        LabChangePredictionPipeline.py
        __init__.py
        LabComponentMap.tab
        try_Keras.py
        processData.py
        LabChangeBatchDriver.py
        LabChangePerformanceReport.Rmd
      - report
        LNPP-repro-steps.md
        __init__.py
        JAMAopenNetwork.py
        data_inspector.py
        LabNormalityReport.py
      - images
      - multisite_mapping
        map_vitals.tab
        map_component.tab
        map_standalone.tab
        map_team.tab
        map_cormobidity.tab
      - test
        TestDataCreator.py
        __init__.py
        TestLabNormalityMatrix_UMich.py
      - lab_statistics
        JAMANetworkOpen
        JAMA_config.py
        Figure 3.ipynb
        Appendix Tables.ipynb
        Appendix Figures.ipynb
        statistical_analysis.ipynb
        Table 1.ipynb
        Figure 2.ipynb
        Figure 1.ipynb
        customDBUtil.py
        stats_database.py
        data_repeat_component_descriptive
        global_stats.csv
        global_stats_null_filled.csv
        result_ids.csv
        base_names.csv
        clinical_data.py
        summary_stats.py
        repeat_lab_descriptive.py
        __init__.py
        data_repeat_lab_descriptive
        proc_codes.csv
        stats_utils.py
        data_summary_stats
        components.csv
        UMich_component.csv
        potential_matches.csv
        component_cnts.txt
        labs_charges_volumes.csv
        UMich_panel.csv
        UCSF.csv
        labs.csv
        CLAB2018v1.csv
        test_repeat_component_descriptive.py
        test_summary_stats.py
        repeat_component_descriptive.py
        data_consistency
        report
      - __init__.py
    - CDSS
      - testGenSim.py
      - insertData.bat
      - patchBPFlowsheet.py
      - insertData.sh
      - setupDataPreprocessingNotes.txt
      - addSynonyms.sql
      - prepareItems.bat
      - setupNotes.txt
      - assocAnalysis.py
      - populateOrderResultStats.py
    - DecayingWindows
      - CreatePatientList.py
      - addConstraints.sql
      - AAOneMonth.py
      - StitchIDFiles.py
      - commitFromFile.py
      - dropConstraints.sql
      - __init__.py
      - AALoopMonth.py
    - ClinicalPredictionRules
      - ConditionMortalityMatrix.py
      - ConditionMortalityPredictor.py
    - ResidentLogs
      - stopWords.txt
      - pagerWordCounts.py
    - TopicModelsVsOrderSets
      - step50.orderSetUsage.py
      - testGenSim.py
      - step170.collateResults.py
      - step110.prepareOrderSetItems.py
      - step185.formatResults.TimeMatched.py
      - patchAdHocOrderSet.py
      - step10.preparePatientItems.py
      - stepXX.quickTest.py
      - step140.orderSetAssociationAnalysis.py
      - step180.formatResults.py
      - step40.associationAnalysis.py
      - step130.orderSetTopicModelAnalysis.py
      - step20.buildTopicModel.py
      - step30.topicModelAnalysis.py
      - orderSetsUsed.py
      - step150.orderSetUsage.py
    - DeepLearningRecommender
      - rnn_data_wrapper.py
      - human_authored_baseline.ipynb
      - load_and_format_data.py
      - FeatureMatrix.py
      - create_batches.py
      - feature_cols.txt
      - ClinicLSTM.ipynb
      - FeatureMatrixFactory.py
      - response_variables.csv
      - ReadMe.md
      - ClinicNet.ipynb
      - makeMatrix.py
  - GlucoseControl
    - 1filterPatient.py
    - README.md
    - queries.sql
    - 2constructFeatures.py
    - 3regression.py
    - 4classification.py
  - BMT_hyperglycemia
    - BGs_BMT_pts.sql
    - recieved_insulin_BMTpts
    - sql_query_R.R
    - BMT_primary_TT.sql
    - steroids_IV_PO_query.sql
    - BMT_steroid_orders.sql
    - insulin_orders_BMT.sql
    - BMTpts_given_steroids.sql
    - count_pt_enc_BMTprimary.sql
    - ADT_BMT_pts.sql
    - pts_on_ISS.sql
    - BMT_and_DM_distinct.sql
    - TT_ADT_join_BMTmatching.sql
    - .Rhistory
    - BMT_age.sql
    - BMT_med_freq.sql
    - readme.md
    - BMT_dx_pts.sql
    - recieved_insulin_BMTpts.sql
    - inpatient_BG_labs.sql
    - BMTpts_ordered_for_insulin.sql
    - BMT_A1c.sql
    - BMT_gender.sql
  - OutpatientReferral
    - mapping
    - data_config.py
    - sampleQueries.sql
    - __init__.py
    - queries.py
    - notes.md
    - data_utils.py
  - Covid19 CDC
    - long_tbl_grp.ipynb
    - notebook.tex
    - prevalenceQueries.sql
    - new_tbl_adj.ipynb
    - long_tbl.ipynb
    - coinfectionQuery.sql
    - long_tbl_grp_person.ipynb
    - cdc_tbl1.ipynb
    - git_export.sh
  - __init__.py
  - BloodCulturePrediction
    - LabCultureMatrix.py
  - DiabeticsAtStanford
    - blood_glucose_partialquery.sql
    - readme
    - pts_on_ISS.sql
    - pullBMTteams.sql
  - InpatientDiabetes
    - Inpatientdiabetesreadme.md
    - flowsheetglucoses.sql
    - Inpatient diabetes cohort
    - TDD_subqueries.sql
    - TDD_insulin.sql
    - inpt_subQ_insulin.sql
    - inpt_weight.sql
    - inpt_glucose_labs.sql
  - AuditLog
    - stroke_cohort.sql
    - sql_scripts
      - ct_head.sql
    - MooreMeeting
      - July10.md
    - general_inpatient_population_diagnosis.sql
    - stroke_cohort_summary.R
    - mlSplitScript.py
    - pipeline.R
    - strokeOrder.R
    - readme.md
    - ct_head.R
- medinfo
  - ml
    - Const.py
    - SupervisedLearner_Class.py
    - BifurcatedSupervisedClassifier.py
    - FeatureSelector.py
    - SupervisedLearningPipeline.py
    - ClassifierAnalyzer.py
    - test
      - FeatureSelectorTestData.py
      - TestClassifierAnalyzer.py
      - TestBifurcatedSupervisedClassifier.py
      - RegressorAnalyzerTestData.py
      - TestSupervisedClassifier.py
      - PredictorAnalyzerTestData.py
      - TestSupervisedLearner.py
      - TestRegressorAnalyzer.py
      - __init__.py
      - expected-linear-predictor.report
      - SupervisedLearningTestData.py
      - TestFeatureSelector.py
      - expected-list-predictor.report
      - TestRegressor.py
      - SupervisedLearnerTestData.py
      - ClassifierAnalyzerTestData.py
      - TestPredictorAnalyzer.py
    - SupervisedLearner_Utils.py
    - Predictor.py
    - PredictorAnalyzer.py
    - __init__.py
    - SupervisedLearner.py
    - ListPredictor.py
    - RegressorAnalyzer.py
    - ClassifierPipeline.py
    - Regressor.py
    - SupervisedLearner_System.py
    - SupervisedClassifier.py
  - sequenceanalysis
    - SequenceAnalyzer.py
    - test
      - TestSequenceAnalyzer.py
      - TestCountRepeatNormals.py
      - __init__.py
    - __init__.py
    - CountRepeatNormals.py
  - web
    - resource
      - ajax-loader.gif
      - dateHelper.js
      - calendar.gif
      - toolkit.js
      - stylesBasic.css
      - stylesGrey.css
      - calendarPopup.htm
      - cancelIcon.svg
      - requestParam.js
      - magnify.gif
      - errorReport.eml
      - stylesBlue.css
    - cgibin
      - Const.py
      - errorTemplate.py
      - admin
        DBUtilWeb.psp
        DebugWeb.py
        BaseAdminWeb.py
        DBUtilWeb.py
        __init__.py
        DBUtilWeb.htm
      - Options.py
      - cgi2psp.py
      - Links.py
      - BaseWeb.py
      - __init__.py
      - cpoe
        BaseCPOEWeb.py
        PatientCareFrame.htm
        PatientCareFrame.py
        PatientCareFrame.js
        SimSetup.htm
        ItemRecommenderWeb.py
        Track.js
        SimSetup.py
        ItemRecommenderWeb.htm
        __init__.py
        dynamicdata
        ResultsReview.htm
        NotesReview.py
        ItemRecommendationTable.htm
        NotesReview.htm
        RelatedOrders.py
        ClinicalItemData.py
        NewOrders.htm
        RelatedOrders.htm
        ActiveOrders.py
        NewOrders.py
        OrderSetSearch.htm
        ActiveOrders.htm
        ResultsReview.py
        __init__.py
        OrderSetSearch.py
        ClinicalItemData.htm
        BaseDynamicData.py
        ItemRecommendationTable.py
      - Env.py
      - errorTemplate.htm
      - Util.py
    - __init__.py
    - tools
      - AcidBaseAnalysis.htm
      - medTemplates.htm
      - lib
        toolkit.js
        Color.js
      - Hyponatremia.htm
      - kiosk
        sleep.bat
        wake.bat
        logoControl.htm
        feedFrame.htm
        index.htm
      - formatTemplates.py
      - OpioidEquivalentDosing.htm
      - Antibiogram
        testOut.tsv
        antibiogramControls.js
        antibiogramStyles.css
        VAMC Wilkes Barre - Antibiogram FY17.xlsx
        AntibiogramTable.htm
        test.tsv
        notes.xlsx
        melt.py
    - index.htm
  - db
    - Const.py
    - DBUtil.py
    - bigquery
      - test
        __init__.py
        test_bigQueryUtil.py
      - bigQueryUtil.py
      - __init__.py
    - test
      - TestResultsFormatter.py
      - Const.py
      - TestDBUtil.py
      - __init__.py
      - Util.py
    - support
      - deleteCategoryItems.py
      - restoreAccessTables.sh
      - restoreAccessTables.bat
      - commonQueries.sql
      - restoreCPOESimulationTables.bat
      - dumpCPOETables.bat
      - dumpAccessTables.bat
      - dumpCPOESimulationTables.bat
      - dumpOpioidPCPTables.bat
      - dumpCPOETables.sh
      - dumpAccessTables.sh
      - restoreCPOETables.sh
      - syncDatabaseTable.py
    - ResultsFormatter.py
    - __init__.py
    - definition
      - cpoeStats.sql
      - opiodrx.sql
      - cpoeSimulation.sql
    - Model.py
    - Env.py
    - Util.py
  - geography
    - Const.py
    - __init__.py
    - ChloroplethGenerator.py
    - ZIPCodeMapping.txt
  - common
    - Const.py
    - test
      - Const.py
      - TestStatsUtil.py
      - __init__.py
      - Util.py
    - IteratorFactory.py
    - support
      - clean.py
      - awaitProcess.py
      - sleeper.py
    - __init__.py
    - ProcessManager.py
    - Env.py
    - StatsUtil.py
    - Util.py
  - textanalysis
    - Const.py
    - test
      - Const.py
      - TestAVSParse.py
      - __init__.py
      - Util.py
    - BaseTextAnalysis.py
    - textAnalysis.js
    - stylesBasic.css
    - stylesGrey.css
    - AVSParse.py
    - __init__.py
    - Env.py
    - Util.py
  - analysis
    - CalibrationHistogram.py
    - Const.py
    - CalibrationPlot.py
    - AccuracyPerTopItems.py
    - RankSimilarity.py
    - test
      - Const.py
      - TestROCPlot.py
      - TestCalibrationPlot.py
      - TestSQLQueryDataFile.py
      - TestAccuracyPerTopItems.py
      - TestMergeJoinDataFiles.py
      - TestConcatenateDataFiles.py
      - __init__.py
      - TestBatchTTests.py
      - Util.py
    - ConcatenateDataFiles.py
    - BaseAnalysis.py
    - SQLQueryDataFile.py
    - MergeJoinDataFiles.py
    - __init__.py
    - RegressionAnalysis.py
    - BatchTTests.py
    - ROCPlot.py
    - CombineScoreOutcomeFiles.py
    - Env.py
    - Util.py
  - __init__.py
  - cpoe
    - Const.py
    - TopicModelRecommender.py
    - AssociationAnalysis.py
    - cpoeSim
      - simdata
        sim_note.dump.sql
        sim_state_result.dump.sql
        sim_state_transition.dump.sql
        Readme.md
        cpoeSim.sql
        restoreSimTables.sh
        revertSimTables.sh
        extraClinicalItems.sql
        sim_patient.dump.sql
        dumpSimTables.sh
        sim_patient_order.dump.sql
        sim_patient_state.dump.sql
        sim_order_result_map.dump.sql
        sim_user.dump.sql
        createClinicalItemTable.py
        sim_state.dump.sql
        sim_note_type.dump.sql
      - Const.py
      - test
        Const.py
        TestSimManagerGrading.py
        TestSimManager.py
        __init__.py
        Util.py
        TestMakeUsageReport.py
      - cpoeSim.md
      - analysis
        sampleQueries.sql
        CPOETrackerAnalysis.py
        __init__.py
        CPOETrackerAnalysis-walkthrough.ipynb
        make_usage_report.py
      - __init__.py
      - SimManager.py
    - test
      - Const.py
      - TestDecayingWindows.py
      - TestOrderSetRecommender.py
      - TestItemRecommender.py
      - TestTopicModel.py
      - TestDataManager.py
      - __init__.py
      - TestTripleAssociationAnalysis.py
      - TestAssociationAnalysis.py
      - Util.py
    - ItemRecommender.py
    - DecayingWindows.py
    - TripleAssociationAnalysis.py
    - analysis
      - Const.py
      - RecommendationClassificationAnalysis.py
      - BaseCPOEAnalysis.py
      - RecommendationRankingTrendAnalysis.py
      - PreparePatientItems.py
      - OrderSetUsageAnalysis.py
      - test
        Const.py
        TestRecommendationClassificationAnalysis.py
        TestOrderSetRecommendationClassificationAnalysis.py
        TestOrderSetUsageAnalysis.py
        TestRecommendationRankingTrendAnalysis.py
        __init__.py
        TestPreparePatientItems.py
        TestOutcomePredictionAnalysis.py
        TestTopicModelAnalysis.py
        Util.py
      - TopicModelAnalysis.py
      - OutcomePredictionAnalysis.py
      - associationQuery.sql
      - __init__.py
      - OrderSetRecommenderClassificationAnalysis.py
      - Env.py
      - Util.py
    - __init__.py
    - TopicModel.py
    - OrderSetRecommender.py
    - DataManager.py
    - Env.py
    - Util.py
  - dataconversion
    - FeatureMatrixIO.py
    - STRIDEDxListConversion.py
    - Const.py
    - starr_conv
      - STARRDemographicsConversion.py
      - STARROrderMedConversion.py
      - test
        TestSTARROrderProcConversion.py
        TestSTARRTreatmentTeamConversion.py
        __init__.py
        TestSTARRDemographicsConversion.py
        TestSTARROrderMedConversion.py
      - STARRUtil.py
      - STARRTreatmentTeamConversion.py
      - __init__.py
      - STARROrderProcConversion.py
    - STRIDEDemographicsConversion.py
    - STRIDEOrderMedConversion.py
    - FeatureMatrix.py
    - test
      - Const.py
      - test-matrix-no-header.tab
      - TestDataExtractor.py
      - TestFeatureMatrixTransform.py
      - TestProviderRotationConversion.py
      - TestSTRIDEOrderResultsConversion.py
      - TestFeatureMatrixIO.py
      - TestSTRIDEOrderProcConversion.py
      - TestSTRIDECultureMicroConversion.py
      - TestResidentScheduleFormat.py
      - TestSTRIDEDemographicsConversion.py
      - FeatureMatrixTestData.py
      - TestSTRIDEAccessLogConversion.py
      - TestSTRIDEOrderMedConversion.py
      - __init__.py
      - FMTransformTestData.py
      - UMichFeatureMatrixTestData.py
      - TestSTRIDEDxListConversion.py
      - TestSTRIDETreatmentTeamConversion.py
      - TestFeatureMatrixFactory.py
      - EventDigraphTestData.py
      - FeatureMatrixIOTestData.py
      - Util.py
      - test-matrix-with-header.tab
    - support
      - extractExcelSheets.py
      - patchHeader.py
      - __init__.py
    - STRIDECultureMicroConversion.py
    - STRIDETreatmentTeamConversion.py
    - STRIDEOrderResultsConversion.py
    - FeatureMatrixFactory.py
    - STRIDEAccessLogConversion.py
    - __init__.py
    - DataExtractor.py
    - ProviderRotationConversion.py
    - STRIDEOrderProcConversion.py
    - Env.py
    - ResidentScheduleFormat.py
    - Util.py
    - FeatureMatrixTransform.py
    - mapdata
      - POAntibiotic.Names.tab
      - ClinicalItems.IVAntibiotics.tab
      - Medication.IVFluids.tab
      - CharlsonComorbidity-ICD10.tab
      - TreatmentTeamGroups.tab
      - ClinicalItems.POAntibiotics.tab
      - IVAntibiotics.Names.tab
      - CharlsonComorbidity-ICD9CM.tab
      - TreatmentTeamGroups_UCSF.tab
- .gitignore

#!/usr/bin/env python
import sys, os
import hashlib
import math
import tempfile
import time
import logging

from datetime import datetime
from optparse import OptionParser
from medinfo.common.Util import stdOpen, ProgressDots
from medinfo.db import DBUtil
from medinfo.db.Model import SQLQuery
from medinfo.db.Model import RowItemModel, modelListFromTable, modelDictFromList, RowItemFieldComparator

from medinfo.dataconversion.Util import log
from medinfo.dataconversion.Const import TEMPLATE_MEDICATION_ID, TEMPLATE_MEDICATION_PREFIX
from medinfo.dataconversion.Const import COLLECTION_TYPE_ORDERSET
from medinfo.dataconversion.Env import DATE_FORMAT

from medinfo.db.bigquery import bigQueryUtil
from medinfo.dataconversion.starr_conv import STARRUtil

from google.cloud import bigquery

SOURCE_TABLE = "starr_datalake2018.order_med"
ORDERSET_TABLE = "starr_datalake2018.med_orderset"

CATEGORY_TEMPLATE = "Med ({}) ({})"     # For this data source, item category will be a Medication subscripted by medication route
GENERIC_CODE_TEMPLATE = "MED{}"         # Template for generic medication code reference if detailed RXCUI values not available
RXCUI_CODE_TEMPLATE = "RXCUI{}"         # Template for medication code references when detailed RXCUI values available


class STARROrderMedConversion:
    """Data conversion module to take STARR provided computerized physician order entry data
    (medications specifically)
    into the structured data analysis tables to facilitate subsequent analysis.

    For combination medications (usually same medication but with "1.5x" dosing like
    Metoprolol 75mg ordered as combination of 50mg + 25mg tabs), just record as the
    first component in the mixture.

    Ignore PRN orders for now to simplify data set and focus on standing orders.
    """

    def __init__(self):
        """Default constructor"""
        self.bqConn = bigQueryUtil.connection()
        self.bqClient = bigQueryUtil.BigQueryClient()
        self.connFactory = DBUtil.ConnectionFactory()   # Default connection source, but Allow specification of alternative DB connection source

        self.categoryBySourceDescr = dict()     # Local cache to track the clinical item category table contents
        self.clinicalItemByCategoryIdCode = dict()  # Local cache to track clinical item table contents
        self.itemCollectionByKeyStr = dict()    # Local cache to track item collections
        self.itemCollectionItemByCollectionIdItemId = dict()    # Local cache to track item collection items

    def convertAndUpload(self, convOptions, tempDir=tempfile.gettempdir(), removeCsvs=True, target_dataset_id='clinical_item2018'):
        """
        Wrapper around primary run function, does conversion locally and uploads to BQ
        No batching done for treatment team since converted table is small
        """
        starrUtil = STARRUtil.StarrCommonUtils(self.bqClient)
        self.convertSourceItems(convOptions)

        batchCounter = 99999    # TODO (nodir) why not 0?
        self.bqClient.reconnect_client()  # refresh bq client connection
        starrUtil.dumpPatientItemCollectionLinkToCsv(tempDir, batchCounter)
        starrUtil.uploadPatientItemCollectionLinkCsvToBQ(tempDir, target_dataset_id, batchCounter)
        if removeCsvs:
            starrUtil.removePatientItemCollectionLinkCsv(tempDir, batchCounter)
        starrUtil.removePatientItemCollectionLinkAddedLines(SOURCE_TABLE)

        # For now keep the clinical_* tables, upload them them once all tables have been converted
        starrUtil.dumpItemCollectionTablesToCsv(tempDir)
        starrUtil.uploadItemCollectionTablesCsvToBQ(tempDir, target_dataset_id)
        if removeCsvs:
            starrUtil.removeItemCollectionTablesCsv(tempDir)
        starrUtil.removeItemCollectionTablesAddedLines(SOURCE_TABLE)

        starrUtil.dumpPatientItemToCsv(tempDir, batchCounter)
        starrUtil.uploadPatientItemCsvToBQ(tempDir, target_dataset_id, batchCounter)
        if removeCsvs:
            starrUtil.removePatientItemCsv(tempDir, batchCounter)
        starrUtil.removePatientItemAddedLines(SOURCE_TABLE)

        # For now keep the clinical_* tables, upload them them once all tables have been converted
        starrUtil.dumpClinicalTablesToCsv(tempDir)
        starrUtil.uploadClinicalTablesCsvToBQ(tempDir, target_dataset_id)
        if removeCsvs:
            starrUtil.removeClinicalTablesCsv(tempDir)
        starrUtil.removeClinicalTablesAddedLines(SOURCE_TABLE)

    def convertSourceItems(self, convOptions):
        """Primary run function to process the contents of the order_med
        table and convert them into equivalent patient_item, clinical_item, and clinical_item_category entries.
        Should look for redundancies after the fact to catch repeatEd conversions.

        startDate - If provided, only return items whose order_time_jittered is on or after that date.
        endDate - If provided, only return items whose order_time_jittered is before that date.
        """
        log.info("Conversion for items dated {} to {}".format(convOptions.startDate, convOptions.endDate))
        progress = ProgressDots()
        conn = self.connFactory.connection()
        try:
            # Load up the medication mapping table to facilitate subsequent conversions
            rxcuiDataByMedId = self.loadRXCUIData()

            # Next round for medications directly from order_med table not addressed in medmix
            for sourceItem in self.querySourceItems(rxcuiDataByMedId, convOptions, progress=progress, conn=conn):
                self.convertSourceItem(sourceItem, conn=conn)
                progress.Update()

        finally:
            conn.close()
        progress.PrintStatus()

    def loadRXCUIData(self):
        """Load up the full contents of the stride_mapped_meds table into
        memory (only a few thousand records) to facilitate rapid lookup resolution
        of common medication ingredient data.
        """
        rxcuiDataByMedId = dict()

        query = \
            """select medication_id, rxcui, active_ingredient
            from starr_datalake2018.mapped_meds
            """

        query_job = self.bqClient.queryBQ(query, verbose=True)

        for row in query_job:  # API request - fetches results
            (medId, rxcui, ingredient) = row    # Unpack the data tuple
            if medId not in rxcuiDataByMedId:
                rxcuiDataByMedId[medId] = dict()
            rxcuiDataByMedId[medId][rxcui] = ingredient

        return rxcuiDataByMedId

    def querySourceItems(self, rxcuiDataByMedId, convOptions, progress=None, conn=None):
        """Query the database for list of all source clinical items (medications, etc.)
        and yield the results one at a time.  If startDate provided, only return items whose
        order_time_jittered is on or after that date.
        """
        # Column headers to query for that map to respective fields in analysis table
        queryHeaders = ["med.order_med_id_coded", "jc_uid", "med.pat_enc_csn_id_coded", "med.medication_id",
                        "med.med_description", "order_time_jittered", "order_time_jittered_utc", "med_route",
                        "number_of_times", "protocol_id", "protocol_name", "ss_section_id", "ss_section_name",
                        "ss_sg_key", "ss_sg_name", "ordering_mode"]

        headers = ["order_med_id_coded", "jc_uid", "pat_enc_csn_id_coded", "medication_id",
                   "med_description", "order_time_jittered", "order_time_jittered_utc", "med_route",
                   "number_of_times", "protocol_id", "protocol_name", "ss_section_id", "ss_section_name",
                   "ss_sg_key", "ss_sg_name", "ordering_mode"]

        # TODO original query - need to figure out how to pass date to query in BQ using SQLQuery object
        # query = SQLQuery()
        # for header in queryHeaders:
        #     query.addSelect(header)
        # query.addFrom("stride_order_med as med left outer join stride_orderset_order_med as os on med.order_med_id = os.order_med_id")  # Grab order set links if they exist
        # query.addWhere("med.medication_id <> %s" % TEMPLATE_MEDICATION_ID)
        # query.addWhere("freq_name not like '%%PRN'")    # Ignore PRN orders
        # if convOptions.startDate is not None:
        #     query.addWhereOp("ordering_date",">=", convOptions.startDate)
        # if convOptions.endDate is not None:
        #     query.addWhereOp("ordering_date","<", convOptions.endDate)

        query = "SELECT {} FROM {} as med left outer join {} as os on med.order_med_id_coded = os.order_med_id_coded".format(', '.join(queryHeaders), SOURCE_TABLE, ORDERSET_TABLE)

        # TODO only 20 records with medication_id = TEMPLATE_MEDICATION_ID (whereas stride has 67041 such rows)
        query += " where med.medication_id <> {}".format(TEMPLATE_MEDICATION_ID)
        query += " and (freq_name is NULL or freq_name not like '%PRN')"    # Ignore PRN orders
        if convOptions.startDate is not None:
            query += " and order_time_jittered >= @startDate"
        if convOptions.endDate is not None:
            query += " and order_time_jittered < @endDate"
        query += " order by order_time_jittered, med.order_med_id_coded, jc_uid, med.pat_enc_csn_id_coded, med.medication_id"
        query += ';'

        query_params = [
            bigquery.ScalarQueryParameter(
                'startDate',
                'DATETIME',
                convOptions.startDate,
            ),
            bigquery.ScalarQueryParameter(
                'endDate',
                'DATETIME',
                convOptions.endDate,
            )
        ]

        # TODO Query to get an estimate of how long the process will be
        # if progress is not None:
        #     progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0]

        query_job = self.bqClient.queryBQ(query, query_params=query_params, verbose=True)

        for row in query_job:  # API request - fetches results
            rowModel = RowItemModel(list(row.values()), headers)
            log.debug("rowModel: {}".format(rowModel))
            for normalizedModel in self.normalizeMedData(rxcuiDataByMedId, rowModel, convOptions):
                yield normalizedModel  # Yield one row worth of data at a time to avoid having to keep the whole result set in memory

    def normalizeMedData(self, rxcuiDataByMedId, rowModel, convOptions):
        """Normalize medication data by active ingredient mixtures and number of doses"""
        for rowModel in self.normalizeMedIngredients(rxcuiDataByMedId, rowModel, convOptions):
            if convOptions.doseCountLimit is not None and rowModel["number_of_times"] is not None:
                if rowModel["number_of_times"] < convOptions.doseCountLimit:
                    rowModel["code"] += " (<{})".format(convOptions.doseCountLimit)
                    rowModel["med_description"] += " (<{} doses)".format(convOptions.doseCountLimit)
            yield rowModel

    @staticmethod
    def normalizeMedIngredients(rxcuiDataByMedId, rowModel, convOptions):
        """Given a rowModel of medication data, normalize it further.
        Specifically, look for common active ingredients to simplify the data.
        If the medication is actually a compound of multiple active ingredients,
        then break out into active ingredients.

        If normalizeMixtures set, then will yield out multiple items to reflect each active ingredient.
        If normalizeMixtures not set, will yield a single item with name being a composite of the active ingredients.
        """
        medId = rowModel["medication_id"]

        if medId not in rxcuiDataByMedId:
            # No mapping entry found, just use the available generic medication data then
            rowModel["code"] = GENERIC_CODE_TEMPLATE.format(rowModel["medication_id"])
            yield rowModel

        else:
            # Mapping entry found, yield a normalized model for each active ingredient found
            #   (will usually be a 1-to-1 relation, but sometimes multiple
            ingredientByRxcui = rxcuiDataByMedId[medId]
            if len(ingredientByRxcui) <= 1 or convOptions.normalizeMixtures:
                # Single ingredient or want component active ingredients to each have one record
                for (rxcui, ingredient) in ingredientByRxcui.items():
                    # ~250/15000 RxCUI's don't have a defined active ingredient.
                    if ingredient is None:
                        # No mapping entry found, just use the available generic medication data then
                        rowModel["code"] = GENERIC_CODE_TEMPLATE.format(rowModel["medication_id"])
                        yield rowModel
                    else:
                        normalizedModel = RowItemModel(rowModel)
                        normalizedModel["medication_id"] = rxcui
                        normalizedModel["code"] = RXCUI_CODE_TEMPLATE.format(normalizedModel["medication_id"])
                        normalizedModel["med_description"] = ingredient.title()
                        if convOptions.includeRouteInDescription:
                            normalizedModel["med_description"] += " {}".format(normalizedModel["med_route"])

                        yield normalizedModel
            else:
                # Mixture of multiple ingredients and want to keep denormalized
                # Extract out the active ingredient names to make a composite based only on that unique combination
                ingredientRxcuiList = [(ingredient, rxcui) for (rxcui, ingredient) in ingredientByRxcui.items()]
                # Ensure consistent order
                ingredientRxcuiList.sort(key=lambda x: x if x[0] is not None else ('', x[1]))   # Python2 sort keeps None at the top while Python3 doesn't allow NoneType and int comparison)

                rxcuiStrList = list()
                ingredientList = list()
                for (ingredient, rxcui) in ingredientRxcuiList:
                    # ~250/15000 RxCUI's don't have a defined active ingredient.
                    if ingredient is None:
                        continue
                    rxcuiStrList.append(str(rxcui))
                    ingredientList.append(ingredient.title())
                rxcuiComposite = str.join(",", rxcuiStrList)
                ingredientComposite = str.join("-", ingredientList)

                #rowModel["medication_id"] = hash(tuple(rxcuiList))    # Arbitrary integer, hash to try to be unique
                #rowModel["code"] = RXCUI_CODE_TEMPLATE.format(rxcuiComposite)
                # Nah, just stick to medication_id instead of creating a new hash number
                rowModel["code"] = GENERIC_CODE_TEMPLATE.format(rowModel["medication_id"])
                rowModel["med_description"] = ingredientComposite
                if convOptions.includeRouteInDescription:
                    rowModel["med_description"] += " {}".format(rowModel["med_route"])
                yield rowModel

    def convertSourceItem(self, sourceItem, conn=None):
        """Given an individual sourceItem record, produce / convert it into an equivalent
        item record in the analysis database.
        """
        extConn = conn is not None
        if not extConn:
            conn = self.connFactory.connection()
        try:
            # Normalize sourceItem data into hierarchical components (category -> clinical_item -> patient_item).
            #   Relatively small / finite number of categories and clinical_items, so these should only have to be instantiated
            #   in a first pass, with subsequent calls just yielding back in memory cached copies
            category = self.categoryFromSourceItem(sourceItem, conn=conn)
            clinicalItem = self.clinicalItemFromSourceItem(sourceItem, category, conn=conn)
            patientItem = self.patientItemFromSourceItem(sourceItem, clinicalItem, conn=conn)

            if sourceItem["protocol_id"] is not None:
                # Similarly build up item collection (order set) hierarchy and link
                itemCollection = self.itemCollectionFromSourceItem(sourceItem, conn=conn)
                itemCollectionItem = self.itemCollectionItemFromSourceItem(sourceItem, itemCollection, clinicalItem, conn=conn)
                patientItemCollectionLink = self.patientItemCollectionLinkFromSourceItem(sourceItem, itemCollectionItem, patientItem, conn=conn)

        finally:
            if not extConn:
                conn.close()

    def categoryFromSourceItem(self, sourceItem, conn):
        # Load or produce a clinical_item_category record model for the given sourceItem
        #   In this case, always Medication
        categoryDescription = CATEGORY_TEMPLATE.format(sourceItem["med_route"], sourceItem["ordering_mode"])
        categoryKey = (SOURCE_TABLE, categoryDescription)
        if categoryKey not in self.categoryBySourceDescr:
            # Category does not yet exist in the local cache.  Check if in database table (if not, persist a new record)
            category = RowItemModel(
                {
                    "source_table":  SOURCE_TABLE,
                    "description":  categoryDescription,
                }
            )
            (categoryId, isNew) = DBUtil.findOrInsertItem("clinical_item_category", category, conn=conn)
            category["clinical_item_category_id"] = categoryId
            self.categoryBySourceDescr[categoryKey] = category
        return self.categoryBySourceDescr[categoryKey]

    def clinicalItemFromSourceItem(self, sourceItem, category, conn):
        # Load or produce a clinical_item record model for the given sourceItem
        clinicalItemKey = (category["clinical_item_category_id"], sourceItem["code"])
        if clinicalItemKey not in self.clinicalItemByCategoryIdCode:
            # Clinical Item does not yet exist in the local cache.  Check if in database table (if not, persist a new record)
            clinicalItem = RowItemModel(
                {
                    "clinical_item_category_id": category["clinical_item_category_id"],
                    "external_id": sourceItem["medication_id"],
                    "name": sourceItem["code"],
                    "description": sourceItem["med_description"],
                }
            )
            (clinicalItemId, isNew) = DBUtil.findOrInsertItem("clinical_item", clinicalItem, conn=conn)
            clinicalItem["clinical_item_id"] = clinicalItemId
            self.clinicalItemByCategoryIdCode[clinicalItemKey] = clinicalItem
        else:
            # Clinical Item does exist, but check for redundancies and opportunities to
            #   simplify different descriptions for the same medication
            priorClinicalItem = self.clinicalItemByCategoryIdCode[clinicalItemKey]
            priorDescription = priorClinicalItem["description"]
            if len(sourceItem["med_description"]) < len(priorDescription) or priorDescription.startswith(TEMPLATE_MEDICATION_PREFIX):
                # Prior medication recorded description either a generic template,
                #   or a longer version than necessary, that can be replaced with the current one
                priorClinicalItem["description"] = sourceItem["med_description"]
                DBUtil.updateRow("clinical_item", priorClinicalItem, priorClinicalItem["clinical_item_id"], conn=conn)
        return self.clinicalItemByCategoryIdCode[clinicalItemKey]

    def patientItemFromSourceItem(self, sourceItem, clinicalItem, conn):
        # Produce a patient_item record model for the given sourceItem
        patientItem = RowItemModel(
            {
                "external_id": sourceItem["order_med_id_coded"],
                "patient_id": int(sourceItem["jc_uid"][2:], 16),
                "encounter_id": sourceItem["pat_enc_csn_id_coded"],
                "clinical_item_id": clinicalItem["clinical_item_id"],
                "item_date": sourceItem["order_time_jittered"],
                "item_date_utc": str(sourceItem["order_time_jittered_utc"]),    # without str(), the time is being converted in postgres
            }
        )
        insertQuery = DBUtil.buildInsertQuery("patient_item", list(patientItem.keys()))
        insertParams = list(patientItem.values())
        try:
            # Optimistic insert of a new unique item
            DBUtil.execute(insertQuery, insertParams, conn=conn)
            patientItem["patient_item_id"] = DBUtil.execute(DBUtil.identityQuery("patient_item"), conn=conn)[0][0]
        except conn.IntegrityError as err:
            # If turns out to be a duplicate, okay, pull out existint ID and continue to insert whatever else is possible
            log.info(err)    # Lookup just by the composite key components to avoid attempting duplicate insertion again
            searchPatientItem = {
                "patient_id":       patientItem["patient_id"],
                "clinical_item_id": patientItem["clinical_item_id"],
                "item_date":        patientItem["item_date"],
            }
            (patientItem["patient_item_id"], isNew) = DBUtil.findOrInsertItem("patient_item", searchPatientItem, conn=conn)
        return patientItem

    def itemCollectionFromSourceItem(self, sourceItem, conn):
        # Load or produce an item_collection record model for the given sourceItem
        if sourceItem["protocol_id"] is None:
            # No order set link to this item, so nothing to return
            return None

        key = {
            "protocol_id": sourceItem["protocol_id"],
            "ss_section_id": sourceItem["ss_section_id"],
            "ss_sg_key": sourceItem["ss_sg_key"].strip().upper() if sourceItem["ss_sg_key"] is not None else None
        }

        collection_key = "%(protocol_id)d-%(ss_section_id)s-%(ss_sg_key)s" % key
        if collection_key not in self.itemCollectionByKeyStr:
            # Collection does not yet exist in the local cache.  Check if in database table (if not, persist a new record)
            collection = RowItemModel(
                {
                    "external_id": sourceItem["protocol_id"],
                    "name": sourceItem["protocol_name"],
                    "section": sourceItem["ss_section_name"],
                    "subgroup": sourceItem["ss_sg_name"],
                }
            )
            (collectionId, isNew) = DBUtil.findOrInsertItem("item_collection", collection, conn=conn)
            collection["item_collection_id"] = collectionId
            self.itemCollectionByKeyStr[collection_key] = collection
        return self.itemCollectionByKeyStr[collection_key]

    def itemCollectionItemFromSourceItem(self, sourceItem, itemCollection, clinicalItem, conn):
        # Load or produce an item_collection_item record model for the given sourceItem
        itemKey = (itemCollection["item_collection_id"], clinicalItem["clinical_item_id"])
        if itemKey not in self.itemCollectionItemByCollectionIdItemId:
            # Item Collection Item does not yet exist in the local cache.  Check if in database table (if not, persist a new record)
            collectionItem = RowItemModel(
                {
                    "item_collection_id": itemCollection["item_collection_id"],
                    "clinical_item_id": clinicalItem["clinical_item_id"],
                    "collection_type_id": COLLECTION_TYPE_ORDERSET,
                }
            )
            (collectionItemId, isNew) = DBUtil.findOrInsertItem("item_collection_item", collectionItem, conn=conn)
            collectionItem["item_collection_item_id"] = collectionItemId
            self.itemCollectionItemByCollectionIdItemId[itemKey] = collectionItem
        return self.itemCollectionItemByCollectionIdItemId[itemKey]

    def patientItemCollectionLinkFromSourceItem(self, sourceItem, collectionItem, patientItem, conn):
        # Produce a patient_item_collection_link record model for the given sourceItem
        patientItemCollectionLink = RowItemModel(
            {
                "patient_item_id": patientItem["patient_item_id"],
                "item_collection_item_id": collectionItem["item_collection_item_id"],
            }
        )
        insertQuery = DBUtil.buildInsertQuery("patient_item_collection_link", list(patientItemCollectionLink.keys()))
        insertParams = list(patientItemCollectionLink.values())
        try:
            # Optimistic insert of a new unique item
            DBUtil.execute(insertQuery, insertParams, conn=conn)
        except conn.IntegrityError as err:
            # If turns out to be a duplicate, okay, just note it and continue to insert whatever else is possible
            log.info(err)

    def main(self, argv):
        """Main method, callable from command line"""
        log.setLevel(logging.FATAL)

        usage_str = "usage: %prog [options]\n"
        parser = OptionParser(usage=usage_str)
        parser.add_option("-s", "--startDate", dest="startDate", metavar="<startDate>",  help="Date string (e.g., 2011-12-15), if provided, will only run conversion on items with ordering time on or after this date.")
        parser.add_option("-e", "--endDate", dest="endDate", metavar="<endDate>",  help="Date string (e.g., 2011-12-15), if provided, will only run conversion on items with ordering time before this date.")
        parser.add_option("-n", "--normalizeMixtures", dest="normalizeMixtures", action="store_true",  help="If set, when find medication mixtures, will unravel / normalize into separate entries, one for each ingredient")
        parser.add_option("-d", "--doseCountLimit", dest="doseCountLimit", help="Medication orders with a finite number of doses specified less than this limit will be labeled as different items than those without a number specified, or whose number is >= to this limit. Intended to distinguish things like IV single bolus / use vs. continuous infusions and standing medication orders")
        (options, args) = parser.parse_args(argv[1:])

        log.info("Starting: " + str.join(" ", argv))
        timer = time.time()

        conv_options = ConversionOptions()
        conv_options.extract_parser_options(options)

        self.convertAndUpload(conv_options)

        timer = time.time() - timer
        log.info("%.3f seconds to complete", timer)


class ConversionOptions:
    """Simple struct to contain multiple program options"""
    def __init__(self):
        self.startDate = None
        self.endDate = None
        self.normalizeMixtures = False
        self.doseCountLimit = None
        self.includeRouteInDescription = True

    def extract_parser_options(self, options):
        if options.startDate is not None:
            # Parse out the start date parameter
            time_tuple = time.strptime(options.startDate, DATE_FORMAT)
            self.startDate = datetime(*time_tuple[0:3])

        if options.endDate is not None:
            # Parse out the end date parameter
            time_tuple = time.strptime(options.endDate, DATE_FORMAT)
            self.endDate = datetime(*time_tuple[0:3])

        if options.doseCountLimit is not None:
            self.doseCountLimit = int(options.doseCountLimit)


if __name__ == "__main__":
    instance = STARROrderMedConversion()
    instance.main(sys.argv)