"""
Miscellaneous tools for the pipeline. Some may eventually be refactored into their own modules.
"""
import re
import itertools
import argparse
import pysam
import pandas as pd
import os
import hashlib

from . import procOps
from .pipeline import ProcException, Procline
from distutils.version import StrictVersion


class HashableNamespace(argparse.Namespace):
    """
    Adds a __hash__ function to argparse's Namespace.
    """
    def __hash__(self):
        m = hashlib.sha256()
        for val in self.__dict__.values():
            m.update(str(val).encode('utf-8'))
        return int(m.hexdigest(), 16) % 10 ** 12


class PipelineNamespace(object):
    """
    A Hashable namespace that maintains knowledge of whether a member is significant and thus should be hashed.
    Used to maintain information on the pipeline state but allow users to change insignificant features without forcing
    the pipeline to rerun expensive modules.
    """
    def __init__(self):
        self.significant = {}

    def set(self, name, val, significant=True):
        setattr(self, name, val)
        self.significant[name] = significant

    def __hash__(self):
        vals = tuple(name for name in self.__dict__ if name != 'significant' and self.significant[name])
        m = hashlib.sha256()
        for val in vals:
            m.update(str(val).encode('utf-8'))
        return int(m.hexdigest(), 16) % 10 ** 12


def convert_gtf_gp(gp_target, gtf_target):
    """converts a GTF to genePred"""
    cmd = ['gtfToGenePred', '-genePredExt', gtf_target.path, '/dev/stdout']
    with gp_target.open('w') as outf:
        procOps.run_proc(cmd, stdout=outf)


def convert_gp_gtf(gtf_target, gp_target, source='CAT'):
    """Converts a genePred to GTF"""
    cmd = ['genePredToGtf', 'file', gp_target.path, '-utr', '-honorCdsStat', '-source={}'.format(source), '/dev/stdout']
    with gtf_target.open('w') as outf:
        procOps.run_proc(cmd, stdout=outf)


def samtools_version():
    """checks the version of samtools installed"""
    try:
        r = procOps.call_proc_lines(['samtools', '--version'])
        if StrictVersion(r[0].split()[1].split('-')[0]) < '1.3':
            raise Exception('samtools version is not >= 1.3.0')
    except ProcException:
        raise Exception('samtools is not installed')


def is_bam(path):
    """Checks if a path is a BAMfile"""
    try:
        pysam.Samfile(path)
    except IOError:
        raise RuntimeError('Path {} does not exist'.format(path))
    except ValueError:
        return False
    return True


def pairwise(iterable):
    """s -> (s0, s1), (s2, s3), (s4, s5), ..."""
    a = iter(iterable)
    return zip(a, a)


def pairwise_adjacent(iterable):
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    a, b = itertools.tee(iterable)
    next(b, None)
    return zip(a, b)


def sort_gff(input_file, output_file):
    """Sorts a GFF format file by column 1 (chromosome) then column 4(start integer)"""
    cmd = [['sort', '-n', '-k4,4', input_file], ['sort', '-s', '-n', '-k5,5'], ['sort', '-s', '-k1,1']]
    procOps.run_proc(cmd, stdout=output_file)


def parse_gtf_attr_line(attr_line):
    """parse a GTF attributes line"""
    if len(attr_line) == 0:
        return {}
    attr_line = [x.split(' ') for x in re.split('; +', attr_line.replace('"', ''))]
    attr_line[-1][-1] = attr_line[-1][-1].rstrip().replace(';', '')
    return dict(attr_line)


def parse_gff_attr_line(attr_line):
    """parse a GFF attributes line"""
    if len(attr_line) == 0:
        return {}
    attr_line = [x.split('=') for x in re.split('; *', attr_line.replace('"', ''))]
    attr_line[-1][-1] = attr_line[-1][-1].rstrip().replace(';', '')
    return dict(attr_line)


def slice_df(df, ix):
    """
    Slices a DataFrame by an index, handling the case where the index is missing. Handles the case where a single row
    is returned, thus making it a series.
    """
    try:
        r = df.xs(ix)
        if isinstance(r, pd.core.series.Series):
            return pd.DataFrame([r])
        else:
            return r
    except KeyError:
        return pd.DataFrame()


def running_in_container():
    """
    Is CAT trying to run tools inside containers?
    """
    return os.environ.get("CAT_BINARY_MODE") != "local"


def is_exec(program):
    """checks if a program is in the global path and executable"""
    if running_in_container():
        # We assume containerized versions don't need to check if the
        # tools are installed--they definitely are, and calling docker
        # just to run "which" can be surprisingly expensive. But we do
        # check for the presence of Docker or Singularity, since that should take
        # only a few ms.
        binary_mode = os.environ.get('CAT_BINARY_MODE')
        cmd = ['which', binary_mode]
        pl = Procline(cmd, stdin='/dev/null', stdout='/dev/null', stderr='/dev/null')
        try:
            pl.wait()
            return True
        except ProcException:
            raise Exception("{0} not found. Either install {0}, or install CAT's dependencies and use --binary-mode local.".format(binary_mode))
    else:
        cmd = ['which', program]
        try:
            return procOps.call_proc_lines(cmd)[0].endswith(program)
        except ProcException:
            return False