#!/usr/bin/env python
import sys
import numpy as np
import os
from scipy.stats import mannwhitneyu,ranksums,ttest_ind,ks_2samp

def compare_by_position(bed1,bed2,xmfa):
    pos_dict = {}

    for i,bed in enumerate([bed1,bed2]):
        pos_dict[i] = {}
        with open(bed,'r') as fi:
                for line in fi:
                #2  1892198 1892199 TCMMTMTTMMM 0.5 -   16
                    csome,start,end,motif,perc_meth,strand,num_reads,probabilities = tuple(line.split('\t'))
                    pos_dict[i][(csome,start,end,strand)] = ((perc_meth,num_reads),np.asarray([float(p) for p in probabilities.strip().split(',')]))

    for pos in pos_dict[0]:
        if pos in pos_dict[1]:
            try:
                u,pval = mannwhitneyu(pos_dict[0][pos][1],pos_dict[0][pos][1],alternative='two-sided')
            except ValueError:
                u,pval = 'none','identical'
            u2,pval2 = ranksums(pos_dict[0][pos][1],pos_dict[0][pos][1])
            try:
                t,pval3 = ttest_ind(pos_dict[0][pos][1],pos_dict[0][pos][1])
            except:
                t,pval3 = 'none','missing df'
            d,pval4 = ks_2samp(pos_dict[0][pos][1],pos_dict[0][pos][1])
            if pval4 < 0.9:
                print pos, pos_dict[0][pos][0], pos_dict[1][pos][0], pval, pval2, pval3, pval4



def main():
    #parse command line options
    from argparse import ArgumentParser
    parser = ArgumentParser(description='Compare methylation between two genomes by probabilities of methylation for aligned positions')
    parser.add_argument('--bed1',type=str,required=True,help='bed file 1 with verbose output from make_bed.py')
    parser.add_argument('--bed2',type=str,required=True,help='bed file 2 with verbose output from make_bed.py')
    parser.add_argument('-g','--genome_alignment',type=str,required=False,help='an xmfa file from mauve (if absent, alignments assumed to be to the same reference genome)')
    parser.add_argument('-v','--version',action='store_true',required=False,help='print version')
    args = parser.parse_args()

    if args.version:
        print 'mCallerNP 0.1'
        sys.exit(0)

    assert os.path.isfile(args.bed1), 'file not found at '+args.bed1
    assert os.path.isfile(args.bed2), 'file not found at '+args.bed2

    compare_by_position(args.bed1,args.bed2,args.genome_alignment)

if __name__ == "__main__":
    main()