###############################################################
## script to calcualte the maximum number of duplicates to keep at single position
## usage: python cal_max_duplicates_to_keep.py genome_size library_size
## By Yanxiao Zhang <yanxiazh@umich.edu>
## Timestamp: 5/26/2016
#######################

import sys
from scipy.stats import binom_test

def binomial(n, p):
    #calculate the expected maximum number of replicated reads at a single position
    x = 1
    pvalue = 0
    while (binom_test(x,n,p) > 0.00001):
        x = x + 1
    if x >1:
        x= x - 1
    return x 


def cal_max_dup(argv):
    ''' The main function for calculating the max dup'''
    help = 'usage: python cal_max_duplicates_to_keep.py genome_size library_size'
    if len(argv) != 3:
        print(help)
        exit(1)
    genome_size = float(argv[1])
    read_total = float(argv[2])
    expected_dup = binomial(read_total, 1.0/genome_size)
    print("The expected maximum duplicate is {0}".format(expected_dup))
    return expected_dup

if __name__ == "__main__":
    cal_max_dup(sys.argv)