#!/usr/bin/env python
# vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4 ai :
"""This script compares sources common to two user-specified sourcelists and displays various measures of their
differences. The differences can be calculated using one of three user-selectable methods:

* Absolute: Just a simple difference e.g. Comparison - Reference.
* Overall mean percent difference:  Percent difference based on the **overall mean** reference value e.g. ((Comparison-Reference)/mean_overall(Reference)) x 100.
* Dynamic percent difference: Percent difference values are calculated discretely for each pair of comparison/reference values e.g. ((Comparison[n]-Reference[n])/Reference[n]) x 100.

3x3-sigma clipped mean, median and standard deviation, and non-sigma clipped min and max values are
computed for the following:

* Image X position (in pixels)
* Image Y position (in pixels)
* Right Ascension (in arcseconds)
* Declination (in arcseconds)
* Flux (Outer Aperture)
* Magnitude (Inner Aperture)
* Magnitude (Outer Aperture)
* Magnitude Error (Inner Aperture)
* Magnitude Error (Outer Aperture)
* MSKY
* STDDEV
* CI (Concentration Index)

Absolute bit-wise comparisons are also performed for the following item:

* Flag Value

.. note::
    Not all sourcelist types compatible with this comparison script contain all of the columns listed above.
    Statistics (and optionally plots) can only be generated for columns common to both user-specified sourcelists.
    Thus, it is to be expected that not all runs will yield comparisons for all columns listed above.



Regression Testing
------------------
**All** of the following criteria must be met for the test to be declared "successful":

* All non-flag (linear) column comparisons: Less than 5% of all comparison - reference difference values are greater than 3 sigma from the sigma-clipped mean
* X position:
* Y position:
* Right Ascension:
* Declination:
* Flux (Inner Aperture):
* Flux (Outer Aperture):
* Magnitude (Inner Aperture):
* Magnitude (Outer Aperture):
* Magnitude error (Inner Aperture):
* Magnitude error (Outer Aperture):
* Flag Value: Less than 5% of all matched sources have differing flag values.

.. note::
    Sigma-clipped values for mean, sigma, and median are computed using the astropy.stats.sigma_clipped_stats() routine with three rounds of three-sigma clipping.


Plots
-----
Plots will be generated if the optional plot input is set to 'screen' or 'file'. If set to 'screen', the plots will be
simply displayed on-screen, and not written to disk. If set to 'file', a plot and statistical summary (on a second
page) is generated for each valid comparison. All plots and summaries saved to a single multiple-page pdf file. The
default name of the combined plot pdf file is 'combined_plot.pdf'. If a user-specified filename prefix string was
specified, it will be prepended to the default name.

The code generates up to three different types of plots:

* Difference histogram plots: These are the most commonly generated plot products are are produced for all column
comparisons except for the bit-wise comparison.
* Bit value barcharts
* X-Y absolute difference vector plot


Path
----
drizzlepac/drizzlepac/devutils/comparison_tools/compare_sourcelists.py

Dependencies
------------
- drizzlepac/drizzlepac/devutils/comparison_tools/starmatch_hist.py
- The `PyPDF2 <https://pypi.org/project/PyPDF2/>`_ python library

Inputs
------
* Required input
    1. *sourcelistNames*
        * A space-separated pair of sourcelists to compare. The first sourcelist is assumed to be the reference sourcelist that the second is being compared to.

* Optional inputs:
    #. -d *debugMode*
        * Perform additional match quality diagnostics
        * Input choices: "True" or "False"
        * Default value: False

    #. -i *imageNames*
        * A space-separated list of the fits images that were used to generate the input sourcelists. The first image corresponds to the first listed sourcelist, and so in. These will be used to improve the sourcelist alignment and matching.

    #. -m *diffMode*
        * How should the comp-ref difference be calculated? "absolute" is simply the straight comp-ref difference. "pmean" is the mean percent difference ((C-R)/avg(R)) x 100. "pdynamic" is the dynamic percent difference ((C-R)/R) x 100
        * Input choices: "absolute", "pmean" or "pdynamic"
        * Default value: "pmean"

    #. -p *plotGen*
        * Generate plots?
        * Input choices: "True" or "False"
        * Default value: False

    #. -s *plotfile_prefix_string*
        * Text string that will prepend the plot files generated if plots are written to files ***REQUIRES the -p option set to 'file'***
        * Default value: blank text string ('')

    #. -v *verbose*
        * Display verbose output?
        * Input choices: "True" or "False"
        * Default value: True

Classes and Functions
---------------------
"""
import argparse
import collections
from datetime import datetime
import os
import pdb
import random
import sys

from astropy.io import fits
from astropy.stats import sigma_clipped_stats
from astropy.coordinates import SkyCoord
from astropy.table import Table

import matplotlib.pyplot as plt
import numpy as np
from PyPDF2 import PdfFileMerger

from drizzlepac.haputils import diagnostic_utils
from drizzlepac.devutils.comparison_tools import starmatch_hist
from stsci.tools import logutil
from stwcs import wcsutil

__taskname__ = 'compare_sourcelists'

MSG_DATEFMT = '%Y%j%H%M%S'
SPLUNK_MSG_FORMAT = '%(asctime)s %(levelname)s src=%(name)s- %(message)s'
log = logutil.create_logger(__name__, level=logutil.logging.NOTSET, stream=sys.stdout, format=SPLUNK_MSG_FORMAT,
                            datefmt=MSG_DATEFMT)


# -~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-


def check_match_quality(matched_x_list, matched_y_list):
    """Creates region file to check quality of source matching.

    Parameters
    ----------
    matched_x_list : list
        list of ref and comp x coords for matched sources

    matched_y_list : list
        list of ref and comp y coords for matched sources

    Returns
    -------
    Nothing.
    """
    out_filename = "match_check.reg"
    num_display = 5000  # Number of pairs to plot
    list_length = len(matched_x_list[0])
    if num_display > list_length:  # if the list of matched sources is smaller than num_display, just use all matched pairs, rather than a randomly selected subset.
        index_list = np.arange(list_length)
    else:
        index_list = random.sample(range(1, list_length), num_display)
    with open(out_filename, "w") as fout:
        for index_no in index_list:
            fout.write("circle({},{},10)  # color=green\n".format(matched_x_list[0][index_no], matched_y_list[0][
                index_no]))  # write ref source circle
            fout.write("circle({},{},10) # color=red\n".format(matched_x_list[1][index_no],
                                                               matched_y_list[1][index_no]))  # write comp source circle
            fout.write(
                "line({},{},{},{}) # color=blue\n".format(matched_x_list[0][index_no], matched_y_list[0][index_no],
                                                          matched_x_list[1][index_no],
                                                          matched_y_list[1][index_no]))  # write line connecting the two
    log.info("Wrote region file {}".format(out_filename))


# -~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-
def computeFlagStats(matchedRA, max_diff, plotGen, plot_title, plotfile_prefix, catalog_names, verbose):
    """Compute and report statistics on the differences in flagging.

    Parameters
    ----------
    matchedRA : numpy.ndarray
        A 2 x len(refLines) sized numpy array. Column 1: matched reference values.
        Column 2: The corresponding matched comparison values

    max_diff : float
        Maximum allowable percentage of all matched sources with differences in their flag values for comparison to be
        declared a success

    plotGen : bool
        Generate plots and display them to the screen (True/False)?

    plot_title : str
        text string that will be used in plot title.

    plotfile_prefix : str
        text string that will prepend the plot files generated if plots are written to files

    catalog_names : list
        list of the sourcelist filenames used as the comparison and the reference

    verbose : bool
        display verbose output?

    Returns
    -------
    regTestStatus : str
        overall test result and statistics
    """
    pdf_file_list = []
    log.info(">>>>>> Comparison - reference sourcelist {} differences <<<<<<".format(plot_title))
    # compute overall percentage of matched sources with flagging differences
    flag_diff_list = list(matchedRA[0] - matchedRA[1])
    n_total = len(matchedRA[0])
    n_unchanged = flag_diff_list.count(0)
    n_changed = n_total - n_unchanged
    pct_changed = (float(n_changed) / float(n_total)) * 100.0

    # set up arrays to count stuff up
    bit_list = [0, 1, 2, 4, 8, 16, 32, 64, 128]
    refFlagBreakdown = np.zeros(9, dtype=int)
    compFlagBreakdown = np.zeros(9, dtype=int)
    unchangedFlagBreakdown = np.zeros(9, dtype=int)
    on_off_FlagFlips = np.zeros(9, dtype=int)
    off_on_FlagFlips = np.zeros(9, dtype=int)
    for refFlag, compFlag in zip(matchedRA[0], matchedRA[1]):
        # break down each flag value into component bit values, add values to totals
        refFlagRA = deconstruct_flag(refFlag)
        compFlagRA = deconstruct_flag(compFlag)
        refFlagBreakdown += refFlagRA
        compFlagBreakdown += compFlagRA
        # find differences in flagging, total up which bits were turned on, which were turned off.
        diffFlagRA = compFlagRA - refFlagRA
        if not np.array_equal(refFlagRA, compFlagRA):
            off_on_FlagFlips[np.where(diffFlagRA == 1)] += 1  # bits that are off in ref but on in comp
            on_off_FlagFlips[np.where(diffFlagRA == -1)] += 1  # bits that are on in ref but off in comp
            unchangedFlagBreakdown[np.where((refFlagRA == 1) & (
                        compFlagRA == 1))] += 1  # takes care of the case were comp and ref have differing bits, but also have additional bits that are unchanged.
        if np.array_equal(refFlagRA, compFlagRA):  # if there are absolutely no differences
            unchangedFlagBreakdown += refFlagRA
    regTestStatus = "OK     "
    if pct_changed > max_diff:
        regTestStatus = "FAILURE"
    log_output_string_list = []

    tot_str_len = len(str(n_total))
    uch_padding = tot_str_len - len(str(n_unchanged))
    ch_padding = tot_str_len - len(str(n_changed))
    if ((verbose is True) or (regTestStatus == "FAILURE")):
        # Generate result tables
        n = np.sum(refFlagBreakdown, dtype=float)
        log_output_string_list.append("        Statistical Breakdown of Reference List Flagging Differences")
        log_output_string_list.append("  Flagging differences by number         Flagging differences by percentage")
        log_output_string_list.append(
            "--------------------------------------------------------------------------------")
        log_output_string_list.append("%5s%9s%12s%10s%5s%5s %9s %12s %10s" % ("FLAG", "# TOTAL", "# UNCHANGED", "# ON->OFF", "  |  ", "FLAG", "% TOTAL", "% UNCHANGED", "% ON->OFF"))
        for ctr in range(0, len(bit_list)):
            log_output_string_list.append("%5d%9d%12d%10d%5s%5d  %8.4f %12.4f %10.4f" % (bit_list[ctr],
                                                                                         refFlagBreakdown[ctr],
                                                                                         unchangedFlagBreakdown[ctr],
                                                                                         on_off_FlagFlips[ctr],
                                                                                         "  |  ",
                                                                                         bit_list[ctr],
                                                                                         (float(refFlagBreakdown[ctr]) / n) * 100.0,
                                                                                         (float(unchangedFlagBreakdown[ctr]) / n) * 100.0,
                                                                                         (float(on_off_FlagFlips[ctr]) / n) * 100.0))
        log_output_string_list.append("%5s%9d%12d%10d%5s%5s  %8.4f %12.4f %10.4f" % ("TOTAL",
                                                                                     np.sum(refFlagBreakdown),
                                                                                     np.sum(unchangedFlagBreakdown),
                                                                                     np.sum(on_off_FlagFlips),
                                                                                     "  |  ",
                                                                                     "TOTAL",
                                                                                     (float(np.sum(refFlagBreakdown)) / n) * 100.0,
                                                                                     (float(np.sum(unchangedFlagBreakdown)) / n) * 100.0,
                                                                                     (float(np.sum(on_off_FlagFlips)) / n) * 100.0))
        log_output_string_list.append("\n")
        n = np.sum(compFlagBreakdown, dtype=float)
        log_output_string_list.append("        Statistical Breakdown of Comparison List Flagging Differences")
        log_output_string_list.append("  Flagging differences by number         Flagging differences by percentage")
        log_output_string_list.append("--------------------------------------------------------------------------------")
        log_output_string_list.append("%5s%9s%12s%10s%5s%5s %9s %12s %10s" % ("FLAG", "# TOTAL", "# UNCHANGED", "# OFF->ON", "  |  ", "FLAG", "% TOTAL", "% UNCHANGED", "% OFF->ON"))
        for ctr in range(0, len(bit_list)):
            log_output_string_list.append("%5d%9d%12d%10d%5s%5d  %8.4f %12.4f %10.4f" % (bit_list[ctr], compFlagBreakdown[ctr], unchangedFlagBreakdown[ctr], off_on_FlagFlips[ctr], "  |  ", bit_list[ctr], (float(compFlagBreakdown[ctr]) / n) * 100.0, (float(unchangedFlagBreakdown[ctr]) / n) * 100.0, (float(off_on_FlagFlips[ctr]) / n) * 100.0))
        log_output_string_list.append("%5s%9d%12d%10d%5s%5s  %8.4f %12.4f %10.4f" % ("TOTAL", np.sum(compFlagBreakdown), np.sum(unchangedFlagBreakdown), np.sum(off_on_FlagFlips), "  |  ", "TOTAL", (float(np.sum(compFlagBreakdown)) / n) * 100.0, (float(np.sum(unchangedFlagBreakdown)) / n) * 100.0, (float(np.sum(off_on_FlagFlips)) / n) * 100.0))
        log_output_string_list.append("\n")
        log_output_string_list.append(
            "Total flag bit differences......... {}\n".format(np.sum([off_on_FlagFlips, on_off_FlagFlips])))
    log_output_string_list.append(
        "{}{}".format(" " * 8, "Overall Percentage of Matched Sources with Flagging Differences"))
    log_output_string_list.append(
        "Total number of matched sources with unchanged flag values...... {}{}".format(" " * uch_padding, n_unchanged))
    log_output_string_list.append(
        "Total number of matched sources with flag value differences..... {}{}".format(" " * ch_padding, n_changed))
    log_output_string_list.append("Total number of matched sources................................. {}".format(n_total))
    log_output_string_list.append(
        "Percentage of all matched sources with flag value differences... {:7.4f}%".format(pct_changed))
    log_output_string_list.append("Regression test status............. {}".format(regTestStatus))

    for log_line in log_output_string_list:
        if log_line == "\n":
            log.info("")
        else:
            log.info(log_line)

    if plotGen != "none":
        idx = np.arange(9)
        x_title_list = []
        for bit in bit_list:
            x_title_list.append(str(bit))

        # plot flag breakdown by bit for all matched sources in the reference and comparison sourcelists
        width = 0.35
        fig = plt.figure(figsize=(11, 8.5))
        ax1 = fig.add_subplot(111)
        plt.bar(idx - width / 2, refFlagBreakdown, width, label='Reference')
        plt.bar(idx + width / 2, compFlagBreakdown, width, label='Comparison')
        plt.legend()
        plt.xticks(idx, x_title_list)
        plt.xlabel("Flag Bit Value")
        plt.ylabel("Number of matched sources")
        fullPlotTitle = "Flag Breakdown by Bit"
        plt.title(fullPlotTitle)

        if plotGen == "screen":
            plt.show()
            plotFileName = ""
            stat_file_name = ""
        if plotGen == "file":
            # Put timestamp and plotfile_prefix text string in lower left corner below plot
            timestamp = "Generated {}".format(datetime.now().strftime("%m/%d/%Y %H:%M:%S"))
            plt.text(0.0, -0.091, "{}\nComparison Sourcelist: {}\nReference Sourcelist:    {}".format(timestamp,catalog_names[1], catalog_names[0]), horizontalalignment='left', verticalalignment='center', fontsize=5, transform=ax1.transAxes)
            plotFileName = "{}_{}.pdf".format(plotfile_prefix, fullPlotTitle.replace(" ", "_"))
            if plotFileName.startswith("_"):
                plotFileName = plotFileName[1:]
            plt.savefig(plotFileName)
            plt.close()
            # generate second pdf page with statistics
            stat_file_name = plotFileName.replace(".pdf", "_stats.pdf")
            fig = plt.figure(figsize=(11, 8.5))
            fig.text(0.5, 0.92, fullPlotTitle, transform=fig.transFigure, size=12, ha="center")
            stat_text_blob = ""
            for log_line in log_output_string_list:
                if log_line != "\n":
                    stat_text_blob = "{}{}\n".format(stat_text_blob, log_line)
                else:
                    stat_text_blob += "\n"
            stat_text_blob += "\n" + timestamp + "\n"
            stat_text_blob = wrap_long_filenames_on_stats_page(stat_text_blob, catalog_names)
            fig.text(0.5, 0.5, stat_text_blob, transform=fig.transFigure, size=10, ha="center", va="center",
                     multialignment="left", family="monospace")
            fig.savefig(stat_file_name)
            plt.close()

        pdf_file_list += [plotFileName, stat_file_name]

        # plot flag changes broken down by bit
        fig = plt.figure(figsize=(11, 8.5))
        ax2 = fig.add_subplot(111)
        p_unchanged = plt.bar(idx, unchangedFlagBreakdown)
        p_offOn = plt.bar(idx, off_on_FlagFlips, bottom=unchangedFlagBreakdown)
        p_onOff = plt.bar(idx, on_off_FlagFlips, bottom=off_on_FlagFlips + unchangedFlagBreakdown)
        plt.xticks(idx, x_title_list)
        plt.xlabel("Flag Bit Value")
        plt.ylabel("Number of matched sources")
        fullPlotTitle = "Flagging Differences by Bit"
        plt.title(fullPlotTitle)
        plt.legend((p_onOff[0], p_offOn, p_unchanged[0]), ("On -> Off", "Off -> On", "Unchanged"))
        if plotGen == "screen":
            plt.show()
            plotFileName = ""
        if plotGen == "file":
            # Put timestamp and plotfile_prefix text string in lower left corner below plot
            timestamp = "Generated {}".format(datetime.now().strftime("%m/%d/%Y %H:%M:%S"))
            plt.text(0.0, -0.091, "{}\nComparison Sourcelist: {}\nReference Sourcelist:    {}".format(timestamp,catalog_names[1], catalog_names[0]), horizontalalignment='left', verticalalignment='center', fontsize=5, transform=ax2.transAxes)
            plotFileName = "{}_{}.pdf".format(plotfile_prefix, fullPlotTitle.replace(" ", "_"))
            if plotFileName.startswith("_"):
                plotFileName = plotFileName[1:]
            plt.savefig(plotFileName)
            plt.close()
        pdf_file_list.append(plotFileName)

    regTestStatus = "%s %11.7f" % (regTestStatus, pct_changed)
    return (regTestStatus, pdf_file_list)


# -~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
def computeLinearStats(matchedRA, max_diff, x_axis_units, plotGen, plot_title, plotfile_prefix, catalog_names,
                       verbose):
    """Compute stats on the quantities with differences that can be computed with simple subtraction 
    (X, Y, RA, Dec, Flux, and Magnitude).

    Parameters
    ----------  
    matchedRA : numpy.ndarray
        A 2 x len(refLines) sized numpy array. Column 1: matched reference values. Column 2: The corresponding matched
    comparison values

    max_diff : float
        Maximum allowable value for comparison test to be declared a success

    x_axis_units : str
        units to display on plot X-axis title

    plotGen : str
        Generate plots?

    plot_title : str
        text string that will be used in plot title.

    plotfile_prefix : str
        text string that will prepend the plot files generated if plots are written to files

    catalog_names : list
        list of the sourcelist filenames used as the comparison and the reference

    verbose : bool
        display verbose output?

    Returns
    -------
    regTestStatus : str
        overall test result and statistics
    """
    log.info(">>>>>> Comparison - reference sourcelist {} absolute differences <<<<<<".format(plot_title))
    if plot_title != "On-Sky Separation":
        # remove any "inf" or "nan" values in matchedRA.
        nanIdx = np.where(np.isnan(matchedRA) == True)[1]
        if len(nanIdx) > 0:
            if verbose:
                log.info("{} np.nan values will be removed from input list. New input list contains {} values.".format(len(nanIdx), len(matchedRA[0, :]) - len(nanIdx)))
            matchedRA = np.delete(matchedRA, nanIdx, axis=1)

        infIdx = np.where(np.isinf(matchedRA) == True)[1]
        if len(infIdx) > 0:
            if verbose:
                log.info("{} np.inf values will be removed from input list. New input list contains {} values.".format(len(infIdx), len(matchedRA[0, :]) - len(infIdx)))
            matchedRA = np.delete(matchedRA, infIdx, axis=1)

    # 'sigma' and 'iters' input values used for various np.sigma_clipped_stats() runs
    sigVal = 3
    intersVal = 3
    if plot_title == "On-Sky Separation":
        diffRA = matchedRA[1].separation(matchedRA[0]).arcsec #convert separations from degrees to arcseconds
    else:
        diffRA = matchedRA[1, :] - matchedRA[0, :]
    clippedStats = sigma_clipped_stats(diffRA, sigma=sigVal, maxiters=intersVal)
    sigma_percentages = []
    for sig_val in [1.0, 2.0, 3.0]:
        sigma_percentages.append((float(np.shape(np.where((diffRA >= (clippedStats[0] - sig_val * clippedStats[2])) & (diffRA <= (clippedStats[0] + sig_val * clippedStats[2]))))[1])/float(np.shape(diffRA)[0])) * 100.0)

    out_stats = "%11.7f %11.7f  %11.7f  %11.7f  %11.7f " % (clippedStats[0], clippedStats[1], clippedStats[2], sigma_percentages[2], 100.0-sigma_percentages[2])

    if ((sigma_percentages[2] >=95.0) and (abs(clippedStats[0])) <= max_diff):  # success condition: Greater than or equal to 95% of all difference values within 3-sigma of sigma-clipped mean
        regTestStatus = "OK      "
    else:
        regTestStatus = "FAILURE "
    if ((verbose == True) or (regTestStatus == "FAILURE ")):
        log_output_string_list = []
        log_output_string_list.append("            Non-Clipped Statistics")
        log_output_string_list.append("Non-clipped minimum....................... {}".format(np.min(diffRA)))
        log_output_string_list.append("Non-clipped maximum....................... {}".format(np.max(diffRA)))
        log_output_string_list.append("Non-clipped mean.......................... {}".format(np.mean(diffRA)))
        log_output_string_list.append("Non-clipped median........................ {}".format(np.median(diffRA)))
        log_output_string_list.append("Non-clipped standard deviation............ {}".format(np.std(diffRA)))
        log_output_string_list.append(
            "Non-clipped mean in units of SD........... {}\n".format(np.divide(np.mean(diffRA), np.std(diffRA))))
        log_output_string_list.append("       Sigma-clipped Statistics; \u03C3 = {}, Number of clipping steps = {}".format(sigVal, intersVal))
        log_output_string_list.append("Sigma-clipped mean........................ {}".format(clippedStats[0]))
        log_output_string_list.append("Sigma-clipped median...................... {}".format(clippedStats[1]))
        log_output_string_list.append("Sigma-clipped standard deviation.......... {}".format(clippedStats[2]))
        for sig_val, pct_val in zip([1.0, 2.0, 3.0], sigma_percentages):
            log_output_string_list.append("% all diff values within {}\u03C3 of mean....... {}%".format(int(sig_val),pct_val))

    log_output_string_list.append("Regression test status.................... {}".format(regTestStatus))
    log_output_string_list.append("\n")


    if plotGen == "none":
        pdf_files = []
        log.info("\n")
        for log_line in log_output_string_list:
            if log_line == "\n":
                log.info("")
            else:
                log.info(log_line)

    else:

        xAxisString = "{}".format(plot_title.split(" ")[0])
        plot_nsigma_cutoff = 10.0
        plotCutoff = (plot_nsigma_cutoff * np.abs(clippedStats[2])) + np.abs(clippedStats[0])
        if plotCutoff != 0.0:
            origSize = len(diffRA)
            log_output_string_list.append("Plot limits: Sigma-clipped mean\u00B1{}\u03C3".format(plot_nsigma_cutoff,plotCutoff))
            goodIdx = np.where((diffRA >= (clippedStats[0] - plot_nsigma_cutoff * clippedStats[2])) & (diffRA <= (clippedStats[0] + plot_nsigma_cutoff * clippedStats[2])))
            good_diffRA = diffRA[goodIdx]
            log_output_string_list.append("%d values (%7.4f percent) clipped from plot." % (origSize - len(good_diffRA), (float(origSize - len(good_diffRA)) / float(origSize)) * 100.0))
        fig = plt.figure(figsize=(11, 8.5))
        ax1 = fig.add_subplot(111)
        if plot_title == "On-Sky Separation":
            fullPlotTitle = "Comparison - reference combined RA & Dec on-sky separation absolute differences"
        else:
            fullPlotTitle = "Comparison - reference sourcelist %s absolute differences" % (plot_title)
        plt.title(fullPlotTitle)
        bins = "auto"
        ax1.hist(good_diffRA, bins=bins)
        ax1.axvline(x=clippedStats[0], color='k', linestyle='--')
        ax1.axvline(x=clippedStats[0] + 3.0*clippedStats[2], color='k', linestyle=':')
        ax1.axvline(x=clippedStats[0] - 3.0*clippedStats[2], color='k', linestyle=':')
        if plot_title == "On-Sky Separation":
            ax1.set_xlabel("Combined RA & Dec on-sky separation (arcseconds)")
        else:
            ax1.set_xlabel("$\Delta {}$ ({})".format(xAxisString, x_axis_units))
        ax1.set_ylabel("Number of matched sources")

        ax2 = ax1.twinx()
        ax2.hist(good_diffRA, bins=bins, cumulative=-1, density=True, histtype='step', color='r')
        ax2.set_ylabel("Fraction of all matched sources", color='r')
        for tl in ax2.get_yticklabels():
            tl.set_color('r')

        log.info("\n")
        for log_line in log_output_string_list:
            if log_line == "\n":
                log.info("")
            else:
                log.info(log_line)

        if plotGen == "screen":
            plt.show()
            plotFileName = ""
            stat_file_name = ""
        if plotGen == "file":
            # Put timestamp and plotfile_prefix text string in lower left corner below plot
            timestamp = "Generated {}".format(datetime.now().strftime("%m/%d/%Y %H:%M:%S"))
            plt.text(0.0, -0.091, "{}\nComparison Sourcelist: {}\nReference Sourcelist:    {}\nDashed and dotted lines indicate 3 x 3\u03C3-clipped mean and \u00B13\u03C3 confidence limits respectively\nNOTE: Values beyond \u00B110\u03C3 from the clipped mean value were excluded from the plot for the sake of readability".format(timestamp, catalog_names[1], catalog_names[0]), horizontalalignment='left',
                     verticalalignment='center', fontsize=5, transform=ax1.transAxes)
            plotFileName = "{}_{}.pdf".format(plotfile_prefix, plot_title.replace(" ", "_"))
            if plotFileName.startswith("_"):
                plotFileName = plotFileName[1:]
            plt.savefig(plotFileName)
            plt.close()
            # generate second pdf page with statistics
            stat_file_name = plotFileName.replace(".pdf", "_stats.pdf")
            fig = plt.figure(figsize=(11, 8.5))
            fig.text(0.5, 0.87, fullPlotTitle[:-1] + " statistics", transform=fig.transFigure, size=12, ha="center")
            stat_text_blob = ""
            for log_line in log_output_string_list:
                if log_line != "\n":
                    stat_text_blob += log_line + "\n"
                else:
                    stat_text_blob += "\n"
            stat_text_blob += "\n" + timestamp + "\n"
            stat_text_blob = wrap_long_filenames_on_stats_page(stat_text_blob, catalog_names)
            fig.text(0.5, 0.5, stat_text_blob, transform=fig.transFigure, size=10, ha="center", va="center",
                     multialignment="left", family="monospace")
            fig.savefig(plotFileName.replace(".pdf", "_stats.pdf"))
            plt.close()
        pdf_files = [plotFileName, stat_file_name]

        # make mag difference vs. mag_comp plot plot
        if plot_title.startswith("Magnitude") and plot_title.endswith("Aperture)"):
            fig = plt.figure(figsize=(11, 8.5))
            ax1 = fig.add_subplot(111)
            plt.scatter(matchedRA[1][goodIdx],diffRA[goodIdx],marker=".",s=10,color="blue")
            plt.axhline(y=clippedStats[0], color='k', linestyle='--')
            ax1.axhline(y=clippedStats[0] + 3.0 * clippedStats[2], color='k', linestyle=':')
            ax1.axhline(y=clippedStats[0] - 3.0 * clippedStats[2], color='k', linestyle=':')
            ax1.set_title("Magnitude vs. Comparison - reference magnitude difference {}".format(plot_title.replace("Magnitude ","")))
            ax1.set_xlabel("Comparison magnitude (ABMAG)")
            ax1.set_ylabel("Comparison - reference difference (ABMAG)")
            ax1.grid(True)

            if plotGen == "screen":
                plt.show()
                plotFileName = ""

            if plotGen == "file":
                # Put timestamp and plotfile_prefix text string in lower left corner below plot
                timestamp = "Generated {}".format(datetime.now().strftime("%m/%d/%Y %H:%M:%S"))
                plt.text(0.0, -0.091, "{}\nComparison Sourcelist: {}\nReference Sourcelist:    {}\nDashed and dotted lines indicate 3 x 3\u03C3-clipped mean and \u00B13\u03C3 confidence limits respectively\nNOTE: Values beyond \u00B110\u03C3 from the clipped mean value were excluded from the plot for the sake of readability".format(timestamp,catalog_names[1], catalog_names[0]), horizontalalignment='left', verticalalignment='center', fontsize=5, transform=ax1.transAxes)
                # file output
                magvsdmag_filename = plotFileName.replace("Magnitude_(", "Magnitude_vs_dmag_(")
                fig.savefig(magvsdmag_filename)
                pdf_files.insert(-1,magvsdmag_filename)
    log.info("\n")

    return (regTestStatus + out_stats, pdf_files)


# -~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
def deconstruct_flag(flagval):
    """Breaks down an integer flag value into individual component bit values.

    Parameters
    ----------
    flagval : int
        Flag value to deconstruct
    
    Returns
    -------
    out_idx_list : list
        a 9-element numpy array of 0s and 1s. Each element of the array represents the presence of a particular 
        bit value (element 0 = bit 0, element 1 = bit 1, ..., element 3 = bit 4 and so on...)
    """
    bit_list = [1, 2, 4, 8, 16, 32, 64, 128]
    flagval = int(flagval)
    # out_bit_list = []
    out_idx_list = np.zeros(9, dtype=int)
    if flagval == 0:
        # out_bit_list = [0]
        out_idx_list[0] = 1
    if flagval > 0:
        idx = 1
        for bit in bit_list:
            if flagval & bit > 0:
                # out_bit_list.append(bit)
                out_idx_list[idx] = 1
            if bit > flagval:
                break
            idx += 1
    return (out_idx_list)


# -~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-
def make_flag_mask(matched_flag_values, good_flag_sum, missing_mask):
    """Returns a list of the array index values to mask based on user-specified good flag value, and missing mask
    Parameters
    ----------
    matched_flag_values : numpy.ndarray
        A 2 x len(refLines) sized numpy array. Column 1: matched reference values.
        Column 2: The corresponding matched comparison values

    good_flag_sum : int
        sum of flag bit values that should be considered "good" for masking purposes

    Returns
    -------
    masked_index_list : numpy list
        list of the array index values to mask
    """
    full_refFlag_list = []
    full_compFlag_list = []
    bitmask = missing_mask#np.full(len(matched_flag_values[0]),0,dtype=bool)
    if good_flag_sum != 255:
        good_bit_list = deconstruct_flag(good_flag_sum) # break good bit sum into list of component bits
        good_bit_list[0] = 1
        bad_bit_list = np.invert(good_bit_list.astype(bool)) # invert good bit list to make bad bit list
    ctr = 0
    for refFlagVal, compFlagVal in zip(matched_flag_values[0], matched_flag_values[1]):
        refFlag_list = deconstruct_flag(refFlagVal) # break ref flag bit sum into list of componant bits
        full_refFlag_list.append(refFlag_list)
        compFlag_list = deconstruct_flag(compFlagVal) # break comp flag bit sum into list of componant bits
        full_compFlag_list.append(compFlag_list)
        if good_flag_sum != 255:
            merged_flag_val = np.logical_or(refFlag_list, compFlag_list) # merge comp and ref flag lists
            bitmask[ctr]= np.any(np.logical_and(merged_flag_val,bad_bit_list)) # generate mask value by checking to see if any of the bad bits are found in the merged comp+ref bit list

        ctr+=1

    masked_index_list = np.where(bitmask == True)
    log.info("{} of {} ({} %) values masked.".format(np.shape(masked_index_list)[1],ctr,
                                                     100.0*(float(np.shape(masked_index_list)[1])/float(ctr))))
    log.info("{} remain.".format(ctr-np.shape(masked_index_list)[1]))
    return bitmask


# -~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-

def mask_missing_values(refData, compData, refLines, compLines, columns_to_compare):
    """Update the bitmask to include lines where any values are missing, nan, or inf from any column in either the comp or ref matched catalogs

    Parameters
    ----------
    refData : astropy Table object
        reference data table

    compData : astropy Table object
        comparison data table

    refLines : numpy.ndarray
        List of matching refData line numbers

    compLines : numpy.ndarray
        List of matching compData line numbers

    columns_to_compare : list
        list of columns that are common to both comparison and reference catalogs and will be used in the comparisons

    Returns
    -------
    out_mask : numpy.ndarray, optional
        Updated list of True/False values where False corresponds to values to keep, and True corresponds to values to remove
    """
    out_mask = np.full(np.shape(refLines),0,dtype=bool)

    for col_title in columns_to_compare:
        matching_refData = refData[col_title][refLines].data
        matching_compData = compData[col_title][compLines].data
        for data_set in [matching_refData, matching_compData]: # merge together all input mask arrays
            if hasattr(data_set, "mask"):
                out_mask = np.logical_or(out_mask, data_set.mask)
            inf_nan_idx = np.where((np.isnan(data_set) == True) | (np.isinf(data_set) == True)) # identify any 'nan' or 'inf' values and flag them out as well
            for mask_idx in inf_nan_idx:
                out_mask[mask_idx] = True
    return out_mask


# -~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-
def extractMatchedLines(col2get, refData, compData, refLines, compLines, bitmask=[]):
    """Extracts only matching lines of data for a specific column of refData and compData. Returns empty list if the
    specified column is not found in both tables.

    Parameters
    ----------
    col2get : str
        Title of the column to return

    refData : astropy Table object
        reference data table

    compData : astropy Table object
        comparison data table

    refLines : numpy.ndarray
        List of matching refData line numbers

    compLines : numpy.ndarray
        List of matching compData line numbers

    bitmask : numpy.ndarray, optional
        list of True/False values where False corresponds to values to keep, and True corresponds to values to remove

    Returns
    -------
    return_ra : numpy ndarray
        A 2 x len(refLines) sized numpy array. Column 1: matched reference values. Column 2: The corresponding matched
        comparison values
    """
    if col2get in list(refData.keys()) and col2get in list(compData.keys()):
        matching_refData = refData[col2get][refLines].data
        matching_compData = compData[col2get][compLines].data
        if bitmask != []:
            bitmask = bitmask.astype(int)
            matching_refData = np.ma.array(matching_refData, mask = bitmask)
            matching_compData = np.ma.array(matching_compData, mask = bitmask)
            matching_refData = matching_refData.compressed()
            matching_compData = matching_compData.compressed()
        return_ra = np.stack((matching_refData, matching_compData))
    else:
        return_ra = np.empty((2,0))
    return return_ra


# -~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-

def getMatchedLists(slNames, imgNames, slLengths, log_level):
    """run starmatch_hist to get the indices of matching sources that are common to both input source catalogs

    Parameters
    ----------
    slNames : list
        list of input source lists

    imgNames : list
        list of input images

    slLengths : list
        list of integer sourcelist lengths

    log_level : int
        The desired level of verboseness in the log statements displayed on the screen and written to the .log file.

    Returns
    -------
    matching_lines_ref : list
        A list of the indices of reference sourcelist sources that match comparison sourcelist sources

    matching_lines_img : list
        A corresponding list of the indices of comparison sourcelist sources that match reference sourcelist sources
    """
    source_list_dict = {}
    equal_flag = False
    if slLengths[0] == slLengths[1]:
        slLengths[0] += 1
        equal_flag = True

    for ctr, slName in enumerate(slNames, 0):
        source_list_dict[slName] = slLengths[ctr]

    try:
        fh = fits.open(imgNames[0])
        xref = fh[0].header['crpix1']
        yref = fh[0].header['crpix2']
        fh.close()
    except Exception:
        log.info(
            "WARNING: Unable to fetch values for xref and yref from fits file headers. Using xref = 0.0 and yref = 0.0.")
        xref = 0.0
        yref = 0.0
    log.info("Summary of input catalog lengths")
    for source_list in source_list_dict.keys():
        log.info("{}: {}".format(os.path.basename(source_list), source_list_dict[source_list]))
    out_dict = starmatch_hist.run(source_list_dict, log_level, xref=xref, yref=yref)
    matching_lines_ref = out_dict[slNames[0]]
    matching_lines_img = out_dict[slNames[1]]

    if equal_flag:
        slLengths[0] -= 1
    # Report number and percentage of the total number of detected ref and comp sources that were matched
    log.info("Sourcelist Matching Results")
    log.info(
        "Reference sourcelist:  {} of {} total sources matched ({} %)".format(len(matching_lines_ref), slLengths[0],
                                                                              100.0 * (float(
                                                                                  len(matching_lines_ref)) / float(
                                                                                  slLengths[0]))))
    log.info(
        "Comparison sourcelist: {} of {} total sources matched ({} %)".format(len(matching_lines_img), slLengths[1],
                                                                              100.0 * (float(
                                                                                  len(matching_lines_img)) / float(
                                                                                  slLengths[1]))))

    return (matching_lines_ref, matching_lines_img)


# -~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-
def makeVectorPlot(x, y, plate_scale, plotDest, plotfile_prefix, catalog_names, binThresh=10000, binSize=250):
    """Generate vector plot of dx and dy values vs. reference (x,y) positions

    Parameters
    ----------
    x : numpy.ndarray
        A 2 x n sized numpy array. Column 1: matched reference X values. Column 2: The corresponding matched
        comparison X values

    y : numpy.ndarray
        A 2 x n sized numpy array. Column 1: matched reference Y values. Column 2: The corresponding matched
        comparison Y values

    plate_scale : float
        plate scale, in arcseconds/pixel

    plotDest : str
        plot destination; screen or file

    plotfile_prefix : str
        text string that will prepend the plot files generated if plots are written to files

    catalog_names : list
        list of the sourcelist filenames used as the comparison and the reference

    binThresh : int, optional
        Minimum size of list *x* and *y* that will trigger generation of a binned vector plot. Default value = 10000.

    binSize : int, optional
        Size of binning box in pixels. When generating a binned vector plot, mean dx and dy values are computed by
        taking the mean of all points located within the box. Default value = 250.

    Returns
    -------
    nothing
    """
    dx = plate_scale * (x[1, :] - x[0, :])
    dy = plate_scale * (y[1, :] - y[0, :])
    if len(dx) > binThresh:  # if the input list is larger than binThresh, a binned vector plot will be generated.
        binStatus = "%d x %d Binning" % (binSize, binSize)
        log.info("Input list length greater than threshold length value. Generating binned vector plot using %d pixel x %d pixel bins" % (binSize, binSize))
        if min(x[0, :]) < 0.0:
            xmin = min(x[0, :])
        else:
            xmin = 0.0
        if min(y[0, :]) < 0.0:
            ymin = min(y[0, :])
        else:
            ymin = 0.0

        p_x = np.empty(shape=[0])
        p_y = np.empty(shape=[0])
        p_dx = np.empty(shape=[0])
        p_dy = np.empty(shape=[0])
        color_ra = []
        for xBinCtr in range(int(round2ArbatraryBase(xmin, "down", binSize)),
                             int(round2ArbatraryBase(max(x[0, :]), "up", binSize)), binSize):
            for yBinCtr in range(int(round2ArbatraryBase(ymin, "down", binSize)),
                                 int(round2ArbatraryBase(max(y[0, :]), "up", binSize)), binSize):
                # define bin box x,y upper and lower bounds
                xBinMin = xBinCtr
                xBinMax = xBinMin + binSize
                yBinMin = yBinCtr
                yBinMax = yBinMin + binSize
                # get indicies of x and y within bounding box
                ix0 = np.where((x[0, :] >= xBinMin) & (x[0, :] < xBinMax) & (y[0, :] >= yBinMin) & (y[0, :] < yBinMax))
                if len(dx[ix0]) > 0 and len(dy[ix0]) > 0:  # ignore empty bins
                    p_x = np.append(p_x, xBinCtr + 0.5 * binSize)  # X and Y position at center of bin.
                    p_y = np.append(p_y, yBinCtr + 0.5 * binSize)
                    mean_dx = np.mean(dx[ix0])
                    p_dx = np.append(p_dx, mean_dx)  # compute mean dx, dy values
                    mean_dy = np.mean(dy[ix0])
                    p_dy = np.append(p_dy, mean_dy)
                    avg_npts = (float(len(dx[ix0])) + float(
                        len(dy[ix0]))) / 2.0  # keep an eye out for mean values computed from less than 10 samples.
                    if (
                            avg_npts < 10.0):  # if less than 10 samples were used in mean calculation, color the vector red.
                        color_ra.append('r')
                    else:
                        color_ra.append('k')
        lowSampleWarning = ""
        if "r" in color_ra:
            lowSampleWarning = "; Red Vectors were computed with less than 10 values"
    else:
        log.info("Generating unbinned vector plot")
        binStatus = "Unbinned"
        lowSampleWarning = ""
        color_ra = ["k"]
        p_x = x[0, :]
        p_y = y[0, :]
        p_dx = dx
        p_dy = dy
    plt_mean = np.mean(np.hypot(p_dx, p_dy))
    e = np.log10(5.0 * plt_mean).round()
    plt_scaleValue = 10 ** e
    fig = plt.figure(figsize=(11, 8.5))
    ax1 = fig.add_subplot(111)
    if len(dx) > binThresh:
        Q = plt.quiver(p_x, p_y, p_dx, p_dy, color=color_ra, units="xy")
    else:
        Q = plt.quiver(p_x, p_y, p_dx, p_dy)
    plt.quiverkey(Q, 0.75, 0.05, plt_scaleValue, r'%5.3f pixels' % (plt_scaleValue), labelpos='S', coordinates='figure', color="k")
    plot_title = "Comparison - reference $\Delta X$, $\Delta Y$ values vs. $(X_{ref}, Y_{ref})$ positions\n%s%s" % (binStatus, lowSampleWarning)
    plt.title(plot_title)
    plt.xlabel(r"$X_{ref}$ image position (pixels)")
    plt.ylabel(r"$Y_{ref}$ image position (pixels)")
    if plotDest == "screen":
        plt.show()
        plotFileName = ""
    if plotDest == "file":
        # Put timestamp and plotfile_prefix text string in lower left corner below plot
        timestamp = "Generated {}".format(datetime.now().strftime("%m/%d/%Y %H:%M:%S"))
        plt.text(0.0, -0.081, "{}\nComparison Sourcelist: {}\nReference Sourcelist:    {}".format(timestamp, catalog_names[1], catalog_names[0]),horizontalalignment='left', verticalalignment='center', fontsize=5, transform=ax1.transAxes)
        plotFileName = "{}_xy_vector_plot.pdf".format(plotfile_prefix, plot_title.replace(" ", "_"))
        if plotFileName.startswith("_"):
            plotFileName = plotFileName[1:]
        plt.savefig(plotFileName)
        plt.close()
        log.info("Vector plot saved to file {}".format(plotFileName))
        log.info("\n")
    return plotFileName


# -~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-
def round2ArbatraryBase(value, direction, roundingBase):
    """Round value up or down to arbitrary base

    Parameters
    ----------
    value : float
        Value to be rounded.

    direction : str
        Round up, down to the nearest base (choices: "up","down","nearest")

    roundingBase : int
        rounding base. (Example: if base = 5, values will be rounded to the nearest multiple of 5 or 10.)

    Returns
    -------
    rv : int
        rounded value.
    """
    if direction.lower().startswith("u"):
        rv = value + (roundingBase - value % roundingBase)  # round up to nearest base
    elif direction.lower().startswith("d"):
        rv = value - value % roundingBase  # round down to nearest base
    else:
        rv = int(roundingBase * round(float(value) / roundingBase))  # round up or down to nearest base
    return rv


# -~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-

def comparesourcelists(slNames=None, imgNames=None, good_flag_sum = 255, plotGen=None, plotfile_prefix=None,
                       verbose=False, json_timestamp=None, json_time_since_epoch=None,
                       log_level=logutil.logging.NOTSET, debugMode=False, input_json_filename=None,
                       output_json_filename=None):
    """Main calling subroutine to compare sourcelists.

    Parameters
    ----------
    slNames : list, optional
        list of input source lists

    imgNames : list, optional
        optional list of input images that starmatch_hist will use to improve sourcelist matching

    good_flag_sum : list, optional
        sum of "good" bits that will be used mask matched sources based on flag values

    plotGen : bool, optional
        Generate plots and display them to the screen (True/False)?

    plotfile_prefix : str, optional
        text string that will prepend the plot files generated if plots are written to files

    verbose : bool, optional
        display verbose output? Default value is False.

    json_timestamp: str, optional
        Universal .json file generation date and time (local timezone) that will be used in the instantiation
        of the HapDiagnostic object. Format: MM/DD/YYYYTHH:MM:SS (Example: 05/04/2020T13:46:35). If not
        specified, default value is logical 'None'

    json_time_since_epoch : float
        Universal .json file generation time that will be used in the instantiation of the HapDiagnostic
        object. Format: Time (in seconds) elapsed since January 1, 1970, 00:00:00 (UTC). If not specified,
        default value is logical 'None'

    log_level : int, optional
        The desired level of verboseness in the log statements displayed on the screen and written to the .log file.
        Default value is 'NOTSET'.

    debugMode : bool, optional
        executes subroutine check_match_quality(), which the writes the matched sources (x, y) coordinates of the
        comparison and reference source lists to ds9 region files for follow-up human visual inspection, and
        write_matched_catalogs() which generates abbreviated versions of the input catalogs that only contain matched
        sources. Default value is False.

    input_json_filename : str, optional
        name of input diagnostic_utils json file to use for test duplication purposes.  # TODO: REWORD THIS!

    output_json_filename : str, optional
        Name of the output diagnostic_utils json file that all matched column values from the input sourcelists will be
        written to so that this compare_sourcelist.py run can be duplicated in the future. If not specified, no json
        file will be created.

    Returns
    -------
    overallStatus : str
        "OK" if all tests were passed, or "FAILURE" if inconsistencies were found.
    """
    log.setLevel(log_level)
    if not plotfile_prefix:
        plotfile_prefix = ""
    regressionTestResults = {}
    colTitles = []
    pdf_file_list = []



    # -1: define dictionary of max allowable mean sigma-clipped difference values
    max_diff_dict = {"X Position": 0.1,  # TODO: Initial "good" value. Optimize as necessary later
                     "Y Position": 0.1,  # TODO: Initial "good" value. Optimize as necessary later
                     "On-Sky Separation" : 0.1,  # TODO: Initial "good" value. Optimize as necessary later
                     "Flux (Inner Aperture)": 1.0,  # TODO: Initial "good" value. Optimize as necessary later
                     "Flux (Outer Aperture)": 1.0,  # TODO: Initial "good" value. Optimize as necessary later
                     "Magnitude (Inner Aperture)": 0.1,  # TODO: Initial "good" value. Optimize as necessary later
                     "Magnitude (Inner Aperture) Error": 0.05,  # TODO: Initial "good" value. Optimize as necessary later
                     "Magnitude (Outer Aperture)": 0.1,  # TODO: Initial "good" value. Optimize as necessary later
                     "Magnitude (Outer Aperture) Error": 0.05,  # TODO: Initial "good" value. Optimize as necessary later
                     "MSKY value": 0.1,  # TODO: Initial "good" value. Optimize as necessary later
                     "STDEV value": 0.05,  # TODO: Initial "good" value. Optimize as necessary later
                     "CI": 0.15,  # TODO: Initial "good" value. Optimize as necessary later
                     "Source Flagging": 5.0}  # TODO: Initial "good" value. Optimize as necessary later
    x_axis_units_dict = {"X Position": "pixels",
                         "Y Position": "pixels",
                         "On-Sky Separation" : "arcseconds",
                         "Flux (Inner Aperture)": "electrons/sec",
                         "Flux (Outer Aperture)": "electrons/sec",
                         "Magnitude (Inner Aperture)": "ABMAG",
                         "Magnitude (Inner Aperture) Error": "ABMAG",
                         "Magnitude (Outer Aperture)": "ABMAG",
                         "Magnitude (Outer Aperture) Error": "ABMAG",
                         "MSKY value": "ABMAG",
                         "STDEV value": "ABMAG",
                         "CI": "ABMAG"}
    if input_json_filename:
        json_data = diagnostic_utils.read_json_file(input_json_filename)
        slNames = []
        slNames.append(json_data['header']['reference catalog filename'])
        slNames.append(json_data['header']['comparison catalog filename'])
    else:
        # 0: optionally instantiate diag_obj
        if output_json_filename:
            diag_obj = diagnostic_utils.HapDiagnostic(log_level=log_level)
            diag_obj.instantiate_from_fitsfile(imgNames[1],
                                               data_source=__taskname__,
                                               description="matched ref and comp values.",
                                               timestamp=json_timestamp,
                                               time_since_epoch=json_time_since_epoch)
            # add reference and comparison catalog filenames as header elements
            diag_obj.add_update_info_item("header", "reference catalog filename", slNames[0])
            diag_obj.add_update_info_item("header", "comparison catalog filename", slNames[1])

        # 1: Read in sourcelists files into astropy table or 2-d array so that individual columns from each sourcelist can be easily accessed later in the code.
        refData, compData = slFiles2dataTables(slNames)
        log.info("Valid reference data columns:   {}".format(list(refData.keys())))
        log.info("Valid comparison data columns: {}".format(list(compData.keys())))
        log.info("\n")
        log.info("Data columns to be compared:")
        columns_to_compare = list(set(refData.keys()).intersection(set(compData.keys())))
        for listItem in sorted(columns_to_compare):
            log.info(listItem)
        log.info("\n")
        # 2: Run starmatch_hist to get list of matched sources common to both input sourcelists
        slLengths = [len(refData['X']), len(compData['X'])]
        matching_lines_ref, matching_lines_img = getMatchedLists(slNames, imgNames, slLengths, log_level)
        if len(matching_lines_ref) == 0 or len(matching_lines_img) == 0:
            log.critical("*** Comparisons cannot be computed. No matching sources were found. ***")
            return ("ERROR")
        # 2: Create masks to remove missing values or values not considered "good" according to user-specified good bit values
        # 2a: create mask that identifies lines any value from any column is missing
        missing_mask = mask_missing_values(refData, compData, matching_lines_ref, matching_lines_img, columns_to_compare)
        # 2b: create mask based on flag values
        matched_values = extractMatchedLines("FLAGS", refData, compData, matching_lines_ref, matching_lines_img)
        bitmask = make_flag_mask(matched_values, good_flag_sum, missing_mask)

        # 2c: add "cross match details" section to optionally created json file
        if output_json_filename:
            cross_match_details = collections.OrderedDict()
            cross_match_details["reference catalog filename"] = slNames[0]
            cross_match_details["comparison catalog filename"] = slNames[1]
            cross_match_details["reference catalog length"] = slLengths[0]
            cross_match_details["comparison catalog length"] = slLengths[1]
            cross_match_details["number of cross-matches"] = len(matching_lines_ref)
            cross_match_details["reference catalog crossmatch percentage"] = (float(len(matching_lines_ref))/float(slLengths[0]))*100.0
            cross_match_details["comparison catalog crossmatch percentage"] = (float(
                len(matching_lines_ref)) / float(slLengths[1])) * 100.0
            cross_match_details["reference catalog reference frame"] = fits.getval(imgNames[0], "radesys", ext=('sci', 1)).lower()
            cross_match_details["comparison catalog reference frame"] = fits.getval(imgNames[1], "radesys", ext=('sci', 1)).lower()
            descriptions_dict = {"reference catalog filename": "reference catalog filename used for crossmatch",
                                 "comparison catalog filename": "comparison catalog filename used for crossmatch",
                                 "reference catalog length": "total reference catalog length (matched and unmatched sources)",
                                 "comparison catalog length": "total comparison catalog length (matched and unmatched sources)",
                                 "number of cross-matches": "total number of sources found common to both reference and comparison catalogs",
                                 "reference catalog crossmatch percentage": "percentage of all reference sources found common to both reference and comparison catalogs",
                                 "comparison catalog crossmatch percentage": "percentage of all comparison sources found common to both reference and comparison catalogs",
                                 "reference catalog reference frame": "reference catalog reference astrometric frame",
                                 "comparison catalog reference frame": "comparison catalog reference astrometric frame"}
            units_dict = {}
            for key_item in descriptions_dict.keys():
                units_dict[key_item] = "unitless"
            diag_obj.add_data_item(cross_match_details,"Cross-match details",descriptions=descriptions_dict, units=units_dict)

    # 3: Compute and display statistics on X position differences for matched sources
    if input_json_filename:
        matched_values = json_data['data']['X']
        plate_scale = json_data['header']['plate_scale']
    else:
        # Get platescale
        plate_scale = wcsutil.HSTWCS(imgNames[0], ext=('sci', 1)).pscale
        matched_values = extractMatchedLines("X", refData, compData, matching_lines_ref, matching_lines_img, bitmask=bitmask)
        if output_json_filename and matched_values.shape[1] > 0:  # Add matched values to diag_obj
            diag_obj.add_data_item(Table([matched_values[0], matched_values[1]], names=('reference', 'comparison')),"X MATCHED VALUES",descriptions={'reference': "reference sourcelist crossmatched x values", 'comparison': "comparison sourcelist crossmatched x values"}, units={'reference': "pixels", 'comparison': "pixels"})
            diag_obj.add_update_info_item("header", "plate_scale", plate_scale)
    if matched_values.shape[1] > 0:
        formalTitle = "X Position"
        rt_status, pdf_files = computeLinearStats(matched_values, max_diff_dict[formalTitle],
                                                  x_axis_units_dict[formalTitle], plotGen, formalTitle,
                                                  plotfile_prefix, slNames, verbose)
        if plotGen == "file":
            pdf_file_list = pdf_files
        regressionTestResults[formalTitle] = rt_status
        colTitles.append(formalTitle)
        matchedXValues = matched_values.copy()

    # 4: Compute and display statistics on Y position differences for matched sources
    if input_json_filename:
        matched_values = json_data['data']['Y']
    else:
        matched_values = extractMatchedLines("Y", refData, compData, matching_lines_ref, matching_lines_img, bitmask=bitmask)
    if output_json_filename and matched_values.shape[1] > 0:  # Add matched values to diag_obj
        diag_obj.add_data_item(Table([matched_values[0], matched_values[1]], names=('reference', 'comparison')),"Y MATCHED VALUES", descriptions={'reference': "reference sourcelist crossmatched y values", 'comparison': "comparison sourcelist crossmatched y values"}, units={'reference': "pixels", 'comparison': "pixels"})
    if matched_values.shape[1] > 0:
        formalTitle = "Y Position"
        rt_status, pdf_files = computeLinearStats(matched_values, max_diff_dict[formalTitle],
                                                  x_axis_units_dict[formalTitle], plotGen, formalTitle,
                                                  plotfile_prefix, slNames, verbose)
        if plotGen == "file":
            pdf_file_list += pdf_files
        regressionTestResults[formalTitle] = rt_status
        colTitles.append(formalTitle)
        matchedYValues = matched_values.copy()
        if plotGen != "none":
            pdf_file_name = makeVectorPlot(matchedXValues, matchedYValues, plate_scale, plotGen, plotfile_prefix, slNames)
            if plotGen == "file":
                pdf_file_list.append(pdf_file_name)
    if debugMode:
        check_match_quality(matchedXValues, matchedYValues)

    # 5: Compute and display statistics on RA/Dec position differences for matched sources
    # Get matched pairs of RA and Dec values
    if input_json_filename:
        matched_values_ra = json_data['data']['RA']
        matched_values_dec = json_data['data']['DEC']
    else:
        matched_values_ra = extractMatchedLines("RA", refData, compData, matching_lines_ref, matching_lines_img, bitmask=bitmask)
        if output_json_filename and matched_values_ra.shape[1] > 0:  # Add matched values to diag_obj
            diag_obj.add_data_item(Table([matched_values_ra[0], matched_values_ra[1]], names=('reference', 'comparison')), "RA MATCHED VALUES", descriptions={'reference': "reference sourcelist crossmatched right ascension values", 'comparison': "comparison sourcelist crossmatched right ascension values"}, units={'reference': "degrees", 'comparison': "degrees"})
        matched_values_dec = extractMatchedLines("DEC", refData, compData, matching_lines_ref, matching_lines_img,bitmask=bitmask)
        if output_json_filename and matched_values_dec.shape[1] > 0:  # Add matched values to diag_obj
            diag_obj.add_data_item(Table([matched_values_dec[0], matched_values_dec[1]], names=('reference', 'comparison')),"DEC MATCHED VALUES", descriptions={'reference': "reference sourcelist crossmatched declination values", 'comparison': "comparison sourcelist crossmatched declination values"}, units={'reference': "degrees", 'comparison': "degrees"})
    if matched_values_ra.shape[1] > 0 and matched_values_ra.shape[1] == matched_values_dec.shape[1]:
        # get coordinate system type from fits headers
        if input_json_filename:
            ref_frame = json_data['header']['ref_frame']
            comp_frame = json_data['header']['comp_frame']
        else:
            ref_frame = fits.getval(imgNames[0],"radesys",ext=('sci', 1)).lower()
            comp_frame = fits.getval(imgNames[1],"radesys",ext=('sci', 1)).lower()
            if output_json_filename:  # Add 'ref_frame' and 'comp_frame" values to header so that will SkyCoord() execute OK
                diag_obj.add_update_info_item("header", "ref_frame", ref_frame)
                diag_obj.add_update_info_item("header", "comp_frame", comp_frame)

        # convert reference and comparison RA/Dec values into SkyCoord objects
        matched_values_ref = SkyCoord(matched_values_ra[0,:],matched_values_dec[0,:], frame=comp_frame, unit="deg")
        matched_values_comp = SkyCoord(matched_values_ra[1,:],matched_values_dec[1,:], frame=ref_frame, unit="deg")
        # convert to ICRS coord system
        if ref_frame != "icrs":
            matched_values_ref = matched_values_ref.icrs
        if comp_frame != "icrs":
            matched_values_comp = matched_values_comp.icrs
        formalTitle = "On-Sky Separation"
        matched_values = [matched_values_ref,matched_values_comp]
        rt_status, pdf_files = computeLinearStats(matched_values, max_diff_dict[formalTitle],
                                                  x_axis_units_dict[formalTitle], plotGen, formalTitle,
                                                  plotfile_prefix, slNames, verbose)
        if plotGen == "file":
            pdf_file_list += pdf_files
        regressionTestResults[formalTitle] = rt_status
        colTitles.append(formalTitle)


    # 7: Compute and display statistics on flux differences for matched sources
    if input_json_filename:
        matched_values = json_data['data']['FLUX1']
    else:
        matched_values = extractMatchedLines("FLUX1", refData, compData, matching_lines_ref, matching_lines_img, bitmask=bitmask)
        if output_json_filename and matched_values.shape[1] > 0:  # Add matched values to diag_obj
            diag_obj.add_data_item(Table([matched_values[0], matched_values[1]], names=('reference', 'comparison')),"FLUX1 MATCHED VALUES", descriptions={'reference': "reference sourcelist crossmatched flux (inner aperture) values", 'comparison': "comparison sourcelist crossmatched flux (inner aperture) values"}, units={'reference': "electrons/sec", 'comparison': "electrons/sec"})
    if matched_values.shape[1] > 0:
        formalTitle = "Flux (Inner Aperture)"
        rt_status, pdf_files = computeLinearStats(matched_values, max_diff_dict[formalTitle],
                                                  x_axis_units_dict[formalTitle], plotGen, formalTitle,
                                                  plotfile_prefix, slNames, verbose)
        if plotGen == "file":
            pdf_file_list += pdf_files
        regressionTestResults[formalTitle] = rt_status
        colTitles.append(formalTitle)

    if input_json_filename:
        matched_values = json_data['data']['FLUX2']
    else:
        matched_values = extractMatchedLines("FLUX2", refData, compData, matching_lines_ref, matching_lines_img, bitmask=bitmask)
        if output_json_filename and matched_values.shape[1] > 0:  # Add matched values to diag_obj
            diag_obj.add_data_item(Table([matched_values[0], matched_values[1]], names=('reference', 'comparison')),"FLUX2 MATCHED VALUES", descriptions={'reference': "reference sourcelist crossmatched flux (outer aperture) values", 'comparison': "comparison sourcelist crossmatched flux (outer aperture) values"}, units={'reference': "electrons/sec", 'comparison': "electrons/sec"})
    if matched_values.shape[1] > 0:
        formalTitle = "Flux (Outer Aperture)"
        rt_status, pdf_files = computeLinearStats(matched_values, max_diff_dict[formalTitle],
                                                  x_axis_units_dict[formalTitle], plotGen, formalTitle,
                                                  plotfile_prefix, slNames, verbose)
        if plotGen == "file":
            pdf_file_list += pdf_files
        regressionTestResults[formalTitle] = rt_status
        colTitles.append(formalTitle)

    # 8: Compute and display statistics on magnitude differences for matched sources
    if input_json_filename:
        matched_values = json_data['data']['MAGNITUDE1']
    else:
        matched_values = extractMatchedLines("MAGNITUDE1", refData, compData, matching_lines_ref, matching_lines_img, bitmask=bitmask)
        if output_json_filename and matched_values.shape[1] > 0:  # Add matched values to diag_obj
            diag_obj.add_data_item(Table([matched_values[0], matched_values[1]], names=('reference', 'comparison')),"MAGNITUDE1 MATCHED VALUES", descriptions={'reference': "reference sourcelist crossmatched magnitude (inner aperture) values", 'comparison': "comparison sourcelist crossmatched magnitude (inner aperture) values"}, units={'reference': "ABMAG", 'comparison': "ABMAG"})
    if matched_values.shape[1] > 0:
        formalTitle = "Magnitude (Inner Aperture)"
        rt_status, pdf_files = computeLinearStats(matched_values, max_diff_dict[formalTitle],
                                                  x_axis_units_dict[formalTitle], plotGen, formalTitle,
                                                  plotfile_prefix, slNames, verbose)
        if plotGen == "file":
            pdf_file_list += pdf_files
        regressionTestResults[formalTitle] = rt_status
        colTitles.append(formalTitle)

    if input_json_filename:
        matched_values = json_data['data']['MERR1']
    else:
        matched_values = extractMatchedLines("MERR1", refData, compData, matching_lines_ref, matching_lines_img, bitmask=bitmask)
        if output_json_filename and matched_values.shape[1] > 0: # Add matched values to diag_obj
            diag_obj.add_data_item(Table([matched_values[0], matched_values[1]], names=('reference', 'comparison')),"MERR1 MATCHED VALUES", descriptions={'reference': "reference sourcelist crossmatched magnitude error (inner aperture) values", 'comparison': "comparison sourcelist crossmatched magnitude error (inner aperture) values"}, units={'reference': "ABMAG", 'comparison': "ABMAG"})
    if matched_values.shape[1] > 0:
        formalTitle = "Magnitude (Inner Aperture) Error"
        rt_status, pdf_files = computeLinearStats(matched_values, max_diff_dict[formalTitle],
                                                  x_axis_units_dict[formalTitle], plotGen, formalTitle,
                                                  plotfile_prefix, slNames, verbose)
        if plotGen == "file":
            pdf_file_list += pdf_files
        regressionTestResults[formalTitle] = rt_status
        colTitles.append(formalTitle)

    if input_json_filename:
        matched_values = json_data['data']['MAGNITUDE2']
    else:
        matched_values = extractMatchedLines("MAGNITUDE2", refData, compData, matching_lines_ref, matching_lines_img, bitmask=bitmask)
        if output_json_filename and matched_values.shape[1] > 0:  # Add matched values to diag_obj
            diag_obj.add_data_item(Table([matched_values[0], matched_values[1]], names=('reference', 'comparison')),"MAGNITUDE2 MATCHED VALUES", descriptions={'reference': "reference sourcelist crossmatched magnitude (outer aperture) values", 'comparison': "comparison sourcelist crossmatched magnitude (outer aperture) values"}, units={'reference': "ABMAG", 'comparison': "ABMAG"})
    if matched_values.shape[1] > 0:
        formalTitle = "Magnitude (Outer Aperture)"
        rt_status, pdf_files = computeLinearStats(matched_values, max_diff_dict[formalTitle],
                                                  x_axis_units_dict[formalTitle], plotGen, formalTitle,
                                                  plotfile_prefix, slNames, verbose)
        if plotGen == "file":
            pdf_file_list += pdf_files
        regressionTestResults[formalTitle] = rt_status
        colTitles.append(formalTitle)

    if input_json_filename:
        matched_values = json_data['data']['MERR2']
    else:
        matched_values = extractMatchedLines("MERR2", refData, compData, matching_lines_ref, matching_lines_img, bitmask=bitmask)
        if output_json_filename and matched_values.shape[1] > 0:  # Add matched values to diag_obj
            diag_obj.add_data_item(Table([matched_values[0], matched_values[1]], names=('reference', 'comparison')),"MERR2 MATCHED VALUES", descriptions={'reference': "reference sourcelist crossmatched magnitude error (outer aperture) values", 'comparison': "comparison sourcelist crossmatched magnitude error (outer aperture) values"}, units={'reference': "ABMAG", 'comparison': "ABMAG"})
    if matched_values.shape[1] > 0:
        formalTitle = "Magnitude (Outer Aperture) Error"
        rt_status, pdf_files = computeLinearStats(matched_values, max_diff_dict[formalTitle],
                                                  x_axis_units_dict[formalTitle], plotGen, formalTitle,
                                                  plotfile_prefix, slNames, verbose)
        if plotGen == "file":
            pdf_file_list += pdf_files
        regressionTestResults[formalTitle] = rt_status
        colTitles.append(formalTitle)

    # 9: Compute and display statistics on differences in background sky values
    if input_json_filename:
        matched_values = json_data['data']['MSKY']
    else:
        matched_values = extractMatchedLines("MSKY", refData, compData, matching_lines_ref, matching_lines_img, bitmask=bitmask)
        if output_json_filename and matched_values.shape[1] > 0:  # Add matched values to diag_obj
            diag_obj.add_data_item(Table([matched_values[0], matched_values[1]], names=('reference', 'comparison')),"MSKY MATCHED VALUES", descriptions={'reference': "reference sourcelist crossmatched msky values", 'comparison': "comparison sourcelist crossmatched msky values"}, units={'reference': "ABMAG", 'comparison': "ABMAG"})
    if matched_values.shape[1] > 0:
        formalTitle = "MSKY value"
        rt_status, pdf_files = computeLinearStats(matched_values, max_diff_dict[formalTitle],
                                                  x_axis_units_dict[formalTitle], plotGen, formalTitle,
                                                  plotfile_prefix, slNames, verbose)
        if plotGen == "file":
            pdf_file_list += pdf_files
        regressionTestResults[formalTitle] = rt_status
        colTitles.append(formalTitle)

    if input_json_filename:
        matched_values = json_data['data']['STDEV']
    else:
        matched_values = extractMatchedLines("STDEV", refData, compData, matching_lines_ref, matching_lines_img, bitmask=bitmask)
        if output_json_filename and matched_values.shape[1] > 0:  # Add matched values to diag_obj
            diag_obj.add_data_item(Table([matched_values[0], matched_values[1]], names=('reference', 'comparison')),"STDEV MATCHED VALUES", descriptions={'reference': "reference sourcelist crossmatched stdev values", 'comparison': "comparison sourcelist crossmatched stdev values"}, units={'reference': "ABMAG", 'comparison': "ABMAG"})
    if matched_values.shape[1] > 0:
        formalTitle = "STDEV value"
        rt_status, pdf_files = computeLinearStats(matched_values, max_diff_dict[formalTitle],
                                                  x_axis_units_dict[formalTitle], plotGen, formalTitle,
                                                  plotfile_prefix, slNames, verbose)
        if plotGen == "file":
            pdf_file_list += pdf_files
        regressionTestResults[formalTitle] = rt_status
        colTitles.append(formalTitle)

    # 10: Compute and display statistics on differences in concentration index  for matched sources
    if input_json_filename:
        matched_values = json_data['data']['CI']
    else:
        matched_values = extractMatchedLines("CI", refData, compData, matching_lines_ref, matching_lines_img, bitmask=bitmask)
        if output_json_filename and matched_values.shape[1] > 0:  # Add matched values to diag_obj
            diag_obj.add_data_item(Table([matched_values[0], matched_values[1]], names=('reference', 'comparison')),"CI MATCHED VALUES", descriptions={'reference': "reference sourcelist crossmatched concentration index (mag2-mag1) values", 'comparison': "comparison sourcelist crossmatched concentration index (mag2-mag1) values"}, units={'reference': "ABMAG", 'comparison': "ABMAG"})
    if matched_values.shape[1] > 0:
        formalTitle = "CI"
        rt_status, pdf_files = computeLinearStats(matched_values, max_diff_dict[formalTitle],
                                                  x_axis_units_dict[formalTitle], plotGen, formalTitle,
                                                  plotfile_prefix, slNames, verbose)
        if plotGen == "file":
            pdf_file_list += pdf_files
        regressionTestResults[formalTitle] = rt_status
        colTitles.append(formalTitle)

    # 11: Compute and display statistics on differences in flag populations for matched sources
    if input_json_filename:
        matched_values = json_data['data']['FLAGS']
    else:
        matched_values = extractMatchedLines("FLAGS", refData, compData, matching_lines_ref, matching_lines_img, bitmask=bitmask)
        if output_json_filename and matched_values.shape[1] > 0:  # Add matched values to diag_obj
            diag_obj.add_data_item(Table([matched_values[0], matched_values[1]], names=('reference', 'comparison')),"FLAGS MATCHED VALUES", descriptions={'reference': "reference sourcelist crossmatched flag values", 'comparison': "comparison sourcelist crossmatched flag values"}, units={'reference': "unitless", 'comparison': "unitless"})
    if matched_values.shape[1] > 0:
        formalTitle = "Source Flagging"
        rt_status, flag_pdf_list = computeFlagStats(matched_values, max_diff_dict[formalTitle], plotGen,
                                                    formalTitle, plotfile_prefix, slNames, verbose)
        if plotGen == "file":
            pdf_file_list += flag_pdf_list
        regressionTestResults[formalTitle] = rt_status
        colTitles.append(formalTitle)

        if debugMode:
            write_matched_catalogs(matchedXValues,matchedYValues,matched_values_ra,matched_values_dec,matched_values,slNames)

    log.info("\n")
    log_output_string_list = []

    lenList = []
    for item in colTitles:
        lenList.append(len(item))
    totalPaddedSize = max(lenList) + 3
    log_output_string_list.append("{}{}".format(" " * 35, "REGRESSION TESTING SUMMARY"))
    log_output_string_list.append("-" * (70 + totalPaddedSize))
    log_output_string_list.append("{}{}".format(" " * (totalPaddedSize + 46), "% within     % beyond"))
    log_output_string_list.append(
        "COLUMN{}STATUS   MEAN        MEDIAN       STD DEV     3\u03C3 of mean   3\u03C3 of mean".format(
            " " * (totalPaddedSize - 6)))
    overallStatus = "OK"
    for colTitle in colTitles:
        log_output_string_list.append(
            "%s%s%s" % (colTitle, "." * (totalPaddedSize - len(colTitle)), regressionTestResults[colTitle]))
        if not regressionTestResults[colTitle].startswith("OK"):
            overallStatus = "FAILURE"
    log_output_string_list.append("-" * (70 + totalPaddedSize))
    log_output_string_list.append("OVERALL TEST STATUS{}{}".format("." * (totalPaddedSize - 19), overallStatus))
    for log_line in log_output_string_list:
        log.info(log_line)

    # optionally write comparison statistics to json file
    if output_json_filename:
        for catalog_colname in regressionTestResults.keys():
            data_table_dict = collections.OrderedDict()
            parse_result_line = regressionTestResults[catalog_colname].split()
            if catalog_colname is "Source Flagging":
                key_list = ['Comparison Test Status', 'Percentage of all matched sources with flag value differences']


                descriptions_dict = {'Comparison Test Status': "overall test result",
                                     'Percentage of all matched sources with flag value differences': "Percentage of all cross-matched sources with flag value differences"}
                units_dict = {}
                for key_item in key_list:
                    units_dict[key_item] = "unitless"
            else:
                key_list = ['Comparison Test Status', '3x3 Sigma-Clipped Mean', '3x3 Sigma-Clipped Median',
                            '3x3 Sigma-Clipped Standard Deviation',
                            'Percent of all matched values within 3 sigma of mean',
                            'Percent of all matched values beyond 3 sigma of mean']
                descrip_list = ['Overall test status',
                                '3x3 sigma-clipped crossmatched comparison - reference mean difference value',
                                '3x3 sigma-clipped crossmatched comparison - reference median difference value',
                                '3x3 sigma-clipped crossmatched comparison - reference standard deviation value',
                                'Percentage of all crossmatched comparison - reference difference values within 3 sigma of mean',
                                'Percentage of all crossmatched comparison - reference difference values beyond 3 sigma of mean']
                descriptions_dict = {}
                for key_item, descrip_item in zip(key_list, descrip_list):
                    descriptions_dict[key_item] = descrip_item

                units = x_axis_units_dict[catalog_colname]
                units_list = ["unitless", units, units, units, units, "unitless", "unitless"]
                units_dict = {}
                for key_item, units_item in zip(key_list, units_list):
                    units_dict[key_item] = units_item

            for dtd_enum_item in enumerate(key_list):
                results_idx = dtd_enum_item[0]
                results_coltitle = dtd_enum_item[1]
                data_table_dict[results_coltitle] = parse_result_line[results_idx]
            data_item_name = "{} RESULTS".format(catalog_colname.upper())
            diag_obj.add_data_item(data_table_dict, data_item_name, descriptions=descriptions_dict, units=units_dict)
            log.debug("Added '{}' data section to json output dictionary".format(data_item_name))

    if plotGen == "file":
        # generate final overall summary pdf page
        stat_summary_file_name = "stats_summary.pdf"
        if plotfile_prefix is not None:
            stat_summary_file_name = "{}_{}".format(plotfile_prefix, stat_summary_file_name)
        fig = plt.figure(figsize=(11, 8.5))
        # fig.text(0.5, 0.87, fullPlotTitle[:-1] + " statistics", transform=fig.transFigure, size=12, ha="center")
        stat_text_blob = ""
        for log_line in log_output_string_list:
            if log_line != "\n":
                stat_text_blob += log_line + "\n"
            else:
                stat_text_blob += "\n"

        stat_text_blob += "\n\nGenerated {}\n".format(datetime.now().strftime("%m/%d/%Y %H:%M:%S"))
        stat_text_blob = wrap_long_filenames_on_stats_page(stat_text_blob, slNames)
        fig.text(0.5, 0.5, stat_text_blob, transform=fig.transFigure, size=10, ha="center", va="center",
                 multialignment="left", family="monospace")
        fig.savefig(stat_summary_file_name)
        plt.close()
        pdf_file_list.append(stat_summary_file_name)
        # combine all individual plot files into a single multiple-page pdf file
        final_plot_filename = "comparison_plots.pdf"
        if plotfile_prefix:
            final_plot_filename = "{}_{}".format(plotfile_prefix, final_plot_filename)

        pdf_merger(final_plot_filename, pdf_file_list)
        log.info("Sourcelist comparison plots saved to file {}.".format(final_plot_filename))

    # Optionally write out diagnostic_utils .json file
    if output_json_filename:
        diag_obj.write_json_file(output_json_filename, clobber=True)
    return (overallStatus)


# -~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-
def slFiles2dataTables(slNames):
    """Reads in data from sourcelists, returns some or all of the following data columns in a pair of properly
    formatted astropy.table objects:

    * X Position
    * Y Position
    * Right Ascension
    * Declination
    * Flux (Inner Aperture)
    * Flux (Outer Aperture)
    * Magnitude (Inner Aperture)
    * Magnitude (Outer Aperture)
    * Flag Value

    .. note::
        'X position' and 'Y position' data columns will always be returned. However, not every sourcelist or catalog
        file is guaranteed to have any of the seven remaining data columns in the above list.

    Parameters
    ----------
    slNames : list
        A list containing the reference sourcelist filename and the comparison sourcelist filename, in that order.

    Returns
    -------
    refData : astropy.table object
        data from the reference sourcelist

    compData : astropy.table object
        data from the comparison sourcelist
    """
    if slNames[0].endswith(".ecsv"):
        refData_in = Table.read(slNames[0], format='ascii.ecsv')
    else:
        try:
            refData_in = Table.read(slNames[0], format='ascii.daophot')
        except Exception:
            refData_in = Table.read(slNames[0], format='ascii')
    if slNames[1].endswith(".ecsv"):
        compData_in = Table.read(slNames[1], format='ascii.ecsv')
    else:
        try:
            compData_in = Table.read(slNames[1], format='ascii.daophot')
        except Exception:
            compData_in = Table.read(slNames[1], format='ascii')
    titleSwapDict_dao1 = {"X": "X-Center", "Y": "Y-Center", "RA": "RA", "DEC": "DEC", "FLUX1": "n/a",
                          "FLUX2": "Flux(0.15)", "MAGNITUDE1": "MagAp(0.05)", "MAGNITUDE2": "MagAp(0.15)",
                          "MERR1": "MagErr(0.05)", "MERR2": "MagErr(0.15)", "MSKY": "MSky(0.15)",
                          "STDEV": "Stdev(0.15)", "FLAGS": "Flags", "ID": "ID", "CI": "CI"}
    titleSwapDict_dao2 = {"X": "X-Center", "Y": "Y-Center", "RA": "RA", "DEC": "DEC", "FLUX1": "n/a",
                          "FLUX2": "Flux(0.45)", "MAGNITUDE1": "MagAp(0.15)", "MAGNITUDE2": "MagAp(0.45)",
                          "MERR1": "MagErr(0.15)", "MERR2": "MagErr(0.45)", "MSKY": "MSky(0.45)",
                          "STDEV": "Stdev(0.45)", "FLAGS": "Flags", "ID": "ID", "CI": "CI"}
    titleSwapDict_dao3 = {"X": "X-Center", "Y": "Y-Center", "RA": "RA", "DEC": "DEC", "FLUX1": "n/a",
                          "FLUX2": "Flux(0.125)", "MAGNITUDE1": "MagAp(0.03)", "MAGNITUDE2": "MagAp(0.125)",
                          "MERR1": "MagErr(0.03)", "MERR2": "MagErr(0.125)", "MSKY": "MSky(0.125)",
                          "STDEV": "Stdev(0.125)", "FLAGS": "Flags", "ID": "ID", "CI": "CI"}
    titleSwapDict_point = {"X": "X-Center", "Y": "Y-Center", "RA": "RA", "DEC": "DEC", "FLUX1": "n/a",
                           "FLUX2": "FluxAp2", "MAGNITUDE1": "MagAp1", "MAGNITUDE2": "MagAp2", "MERR1": "MagErrAp1",
                           "MERR2": "MagErrAp2", "MSKY": "MSkyAp2", "STDEV": "StdevAp2", "FLAGS": "Flags", "ID": "ID",
                           "CI": "CI"}
    titleSwapDict_segment = {"X": "X-Centroid", "Y": "Y-Centroid", "RA": "RA", "DEC": "DEC", "FLUX1": "n/a",
                             "FLUX2": "FluxAp2", "MAGNITUDE1": "MagAp1", "MAGNITUDE2": "MagAp2", "MERR1": "MagErrAp1",
                             "MERR2": "MagErrAp2", "MSKY": "n/a", "STDEV": "n/a", "FLAGS": "Flags", "ID": "ID",
                             "CI": "CI"}
    titleSwapDict_daoTemp = {"X": "XCENTER", "Y": "YCENTER", "RA": "n/a", "DEC": "n/a", "FLUX1": "FLUX1",
                             "FLUX2": "FLUX2", "MAGNITUDE1": "MAG1", "MAGNITUDE2": "MAG2", "FLAGS": "n/a", "ID": "ID",
                             "MERR1": "MERR1", "MERR2": "MERR2", "MSKY": "MSKY", "STDEV": "STDEV"}
    titleSwapDict_sourceX = {"X": "X_IMAGE", "Y": "Y_IMAGE", "RA": "RA", "DEC": "DEC", "FLUX1": "FLUX_APER1",
                             "FLUX2": "FLUX_APER2", "MAGNITUDE1": "MAG_APER1", "MAGNITUDE2": "MAG_APER2",
                             "FLAGS": "FLAGS", "ID": "NUMBER"}
    titleSwapDict_cooNew = {"X": "col1", "Y": "col2", "RA": "n/a", "DEC": "n/a", "FLUX1": "n/a", "FLUX2": "n/a",
                            "MAGNITUDE1": "n/a", "MAGNITUDE2": "n/a", "FLAGS": "n/a", "ID": "col7"}
    # titleSwapDict_cooOld = {"X": "XCENTER", "Y": "YCENTER", "RA": "n/a", "DEC": "n/a", "FLUX1": "n/a", "FLUX2": "n/a",
    #                         "MAGNITUDE1": "n/a", "MAGNITUDE2": "n/a", "FLAGS": "n/a", "ID": "ID"}

    titleSwapDict_cooOld2 = {"X": "XCENTER", "Y": "YCENTER", "RA": "RA", "DEC": "DEC", "FLUX1": "FLUX_0.05",
                             "FLUX2": "FLUX_0.15", "MAGNITUDE1": "MAG_0.05", "MAGNITUDE2": "MAG_0.15", "FLAGS": "n/a",
                             "ID": "ID", "MERR1": "MERR_0.05", "MERR2": "MERR_0.15", "MSKY": "MSKY", "STDEV": "STDEV"}

    titleSwapDict_daorep = {"X": "X", "Y": "Y", "RA": "n/a", "DEC": "n/a", "FLUX1": "flux_0", "FLUX2": "flux_1",
                            "MAGNITUDE1": "mag_0", "MAGNITUDE2": "mag_1", "FLAGS": "n/a", "ID": "n/a"}
    ctr = 1
    for dataTable in [refData_in, compData_in]:
        if "X-Center" in list(dataTable.keys()):
            if (("MagAp(0.05)" in list(dataTable.keys())) and (
                    "MagAp(0.15)" in list(dataTable.keys()))):  # ACS/WFC, WFC3/UVIS
                log.info("titleSwapDict_dao1")
                titleSwapDict = titleSwapDict_dao1
            elif (("MagAp(0.15)" in list(dataTable.keys())) and ("MagAp(0.45)" in list(dataTable.keys()))):  # WFC3/IR
                log.info("titleSwapDict_dao2")
                titleSwapDict = titleSwapDict_dao2
            elif (("MagAp(0.03)" in list(dataTable.keys())) and ("MagAp(0.125)" in list(dataTable.keys()))):  # ACS/HRC
                log.info("titleSwapDict_dao3")
                titleSwapDict = titleSwapDict_dao3
            elif "MagAp1" in list(dataTable.keys()):
                log.info("titleSwapDict_point")
                titleSwapDict = titleSwapDict_point
            else:
                sys.exit("ERROR: Unrecognized format. Exiting...")
        elif ("XCENTER" in list(dataTable.keys()) and "FLUX1" in list(dataTable.keys())):
            log.info("titleSwapDict_daoTemp")
            titleSwapDict = titleSwapDict_daoTemp
        elif "X_IMAGE" in list(dataTable.keys()):
            log.info("titleSwapDict_sourceX")
            titleSwapDict = titleSwapDict_sourceX
        elif "col1" in list(dataTable.keys()):
            log.info("titleSwapDict_cooNew")
            titleSwapDict = titleSwapDict_cooNew
        elif "XCENTER" in list(dataTable.keys()):
            log.info("titleSwapDict_cooOld2")
            titleSwapDict = titleSwapDict_cooOld2
        elif "X" in list(dataTable.keys()):
            log.info("titleSwapDict_daorep")
            titleSwapDict = titleSwapDict_daorep
        elif "X-Centroid" in list(dataTable.keys()) and "MagAp1" in list(dataTable.keys()):
            log.info("titleSwapDict_segment")
            titleSwapDict = titleSwapDict_segment
        else:
            sys.exit("ERROR: Unrecognized format. Exiting...")
        outTable = Table()
        for swapKey in list(titleSwapDict.keys()):
            if titleSwapDict[swapKey] != "n/a":
                try:
                    col2add = Table.Column(name=swapKey, data=dataTable[titleSwapDict[swapKey]])
                except TypeError:
                    col2add = Table.MaskedColumn(name=swapKey, data=dataTable[titleSwapDict[swapKey]])
                outTable.add_column(col2add)
        if ctr == 1:
            refData = outTable
        if ctr == 2:
            compData = outTable
        ctr += 1

    return (refData, compData)


# -~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-

def pdf_merger(output_path, input_paths):
    """Merges multiple pdf files into a single multi-page pdf file
    
    Parameters
    ----------
    output_path : str
        name of output multipage pdf file
        
    input_paths : list
        list of pdf files to combine
    
    Returns
    -------
    nothing.
    """
    pdf_merger = PdfFileMerger()

    for path in input_paths:
        pdf_merger.append(path)

    with open(output_path, 'wb') as fileobj:
        pdf_merger.write(fileobj)

    for path in input_paths:
        os.remove(path)


# -~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-

def wrap_long_filenames_on_stats_page(stat_text_blob, slNames):
    """wrap long (length > 80 characters) catalog filenames to the next line so the filenames don't run off
    the edge of the page

    Parameters
    ----------
    stat_text_blob : str
        text block that is updated with properly formatted catalog filenames.

    slNames : list
        two-element list containing the comparison catalog filename, followed by the reference catalog
        filename

    Returns
    -------
    stat_text_blob : str
        updated version of input argument stat_text_blob
    """
    if len(slNames[1]) > 80:
        out_compcat_filename = "{}\n{}{}".format(slNames[1][:80], " " * 23, slNames[1][80:])
    else:
        out_compcat_filename = slNames[1]
    if len(slNames[0]) > 80:
        out_refcat_filename = "{}\n{}{}".format(slNames[0][:80], " " * 23, slNames[0][80:])
    else:
        out_refcat_filename = slNames[0]
    stat_text_blob += "Comparison Sourcelist: {}\nReference Sourcelist:  {}".format(out_compcat_filename,
                                                                                    out_refcat_filename)
    return stat_text_blob

# -~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-

def write_matched_catalogs(x,y,ra,dec,flags,slnames):
    """Writes only matched elements of the input catalogs for columns X, Y, RA, Dec, and Flags ONLY. These catalogs will
    are to be used as inputs for compare_sourcelist_flagging.py. The output file names are based on the input file
    names, with the string 'matched_files_only' inserted after all the proposal/visit/instrument/detector and
    (HAP only) ippss information. For example, the output file produced from HAP point catalog
    'hst_11665_06_wfc3_uvis_f555w_ib4606_point-cat.ecsv' is
    'hst_11665_06_wfc3_uvis_f555w_ib4606_matched_sources_only_point-cat.ecsv', and the output file produced from
    the corrected HLA Classic daophot catalog 'hst_11665_06_wfc3_uvis_f555w_daophot_corrected.txt' is
    'hst_11665_06_wfc3_uvis_f555w_matched_sources_only_daophot_corrected.txt'.

    Parameters
    ----------
    x : numpy.ndarray
        A 2 x len(refLines) sized numpy array. Column 1: matched x reference values.
        Column 2: The corresponding matched comparison values

    y : numpy.ndarray
        A 2 x len(refLines) sized numpy array. Column 1: matched y reference values.
        Column 2: The corresponding matched comparison values

    ra : numpy.ndarray
        A 2 x len(refLines) sized numpy array. Column 1: matched RA reference values.
        Column 2: The corresponding matched comparison values

    dec : numpy.ndarray
        A 2 x len(refLines) sized numpy array. Column 1: matched Dec reference values.
        Column 2: The corresponding matched comparison values

    flags : numpy.ndarray
        A 2 x len(refLines) sized numpy array. Column 1: matched flag reference values.
        Column 2: The corresponding matched comparison values

    slNames : list
        list of input sourcelist filenames. it is assumed here and throughout the code that the first file listed is
        the reference, and the second is the comparison

    Returns
    -------
    Nothing.
    """
    for ctr in range(0,2):
        sl_name = slnames[ctr]
        for file_ending in ["daophot.txt", "daophot_corrected.txt", "point-cat.ecsv", "sexphot.txt",
                            "sexphot_corrected.txt", "segment-cat.ecsv"]:
            if sl_name.endswith(file_ending):
                output_filename = sl_name.replace(file_ending,"matched_sources_only_{}".format(file_ending))
        flagscolname = "Flags"
        if (sl_name.endswith("daophot.txt") or sl_name.endswith("daophot_corrected.txt") or sl_name.endswith("point-cat.ecsv")):
            xcolname = "X-Center"
            ycolname = "Y-Center"
        elif sl_name.endswith("segment-cat.ecsv"):
            xcolname = "X-Centroid"
            ycolname = "Y-Centroid"
        else:
            xcolname = "X_IMAGE"
            ycolname = "Y_IMAGE"
            flagscolname = "FLAGS"
        output_table = Table([x[ctr,:],y[ctr,:],ra[ctr,:],dec[ctr,:],flags[ctr,:]],names=(xcolname,ycolname,"RA","DEC",flagscolname))
        if output_filename.endswith(".ecsv"):
            output_format = "ascii.ecsv"
        if output_filename.endswith(".txt"):
            output_format = "ascii.csv"
        output_table.write(output_filename, format = output_format)
        log.info("Wrote matched sources only catalog {}".format(output_filename))

# =======================================================================================================================
if __name__ == "__main__":
    PARSER = argparse.ArgumentParser(description='Compare Sourcelists')
    # required positional input arguments
    PARSER.add_argument('-sl','--sourcelistNames', nargs=2,
                        help='A space-separated pair of sourcelists to compare. The first sourcelist is assumed to be the reference sourcelist that the second is being compared to.')
    # optional input arguments
    PARSER.add_argument('-d', '--debugMode', required=False, choices=["True", "False"], default="False",
                        help="Turn on debug mode? Default value is False.")
    PARSER.add_argument('-g', '--goodFlagSum', required=False, default=255, help = "a sum of individual bit values (i.e. 0 + 1 + 2 + 4 = 7) that will be considered 'good'. If any of the flag bits for a given set of matched sources contain bits not specified here, the pair will be ignored by the comparisons. See XXX for flag bit definitions. (NOTE: The default value of 255 will be interperated as all bits are good, so no sources will be excluded)")
    PARSER.add_argument('-i', '--imageNames', required=False, nargs=2,
                        help='A space-separated list of the fits images that were used to generate the input sourcelists. The first image corresponds to the first listed sourcelist, and so in. These will be used to improve the sourcelist alignment and matching.')
    PARSER.add_argument('-ji', '--input_json_filename', required=False,default=None,help="name of input diagnostic_utils json file to use for test duplication purposes. If not specified, it is assumed that the user intends to run the script with new inputs, and should specify sourcelist names and image names.")  # TODO: Reads sort of clunky. Rewrite to sound better.
    PARSER.add_argument('-jo', '--output_json_filename', required=False, default=None, help="Name of the output diagnostic_utils json file that all matched column values from the input sourcelists will be written to so that this compare_sourcelist.py run can be duplicated in the future. If not specified, no json file will be created.")
    PARSER.add_argument('-p', '--plotGen', required=False, choices=["screen", "file", "none"], default="none",
                        help='Generate Plots? "screen" displays plots on-screen. "file" saves them to a .pdf file, and "none" skips all plot generation.')
    PARSER.add_argument('-s', '--plotfile_prefix_string', required=False, default="",
                        help="text string that will prepend the plot files generated if plots are written to files ***REQUIRES the -p option set to 'file'***")
    PARSER.add_argument('-v', '--verbose', required=False, choices=["True", "False"], default="True",
                        help='Display verbose output? Default value is "True".')
    ARGS = PARSER.parse_args()
    print(ARGS)

    if ARGS.verbose == "True":
        ARGS.verbose = True
    else:
        ARGS.verbose = False

    if ARGS.debugMode == "True":
        ARGS.debugMode = True
    else:
        ARGS.debugMode = False

    if ARGS.goodFlagSum:
        good_flag_bits = deconstruct_flag(ARGS.goodFlagSum)
        good_flag_bits[0] = 1
    else:
        good_flag_bits = np.ones(9, dtype=int)
    runStatus = comparesourcelists(slNames=ARGS.sourcelistNames,
                                   imgNames=ARGS.imageNames,
                                   good_flag_sum=ARGS.goodFlagSum,
                                   plotGen=ARGS.plotGen,
                                   verbose=ARGS.verbose,
                                   log_level=logutil.logging.INFO,
                                   debugMode=ARGS.debugMode,
                                   plotfile_prefix=ARGS.plotfile_prefix_string,
                                   input_json_filename=ARGS.input_json_filename,
                                   output_json_filename=ARGS.output_json_filename)


# TODO: fix PEP 8 violations