#! /usr/bin/env python
import os
import subprocess
import sys
import string
import optparse
import fasta
import math
import tempfile
import shutil


def format(seq, N=60):
	nseg = int(math.ceil(len(seq)/(N+0.0)))
	return '\n'.join([seq[i * N:(i + 1) * N] for i in range(nseg)])
# write into fasta format file

parser = optparse.OptionParser(version="%prog ")

parser.add_option(
	"-i", "--input", dest="input_fasta", action="store",
	help="name of input file in fasta format")

parser.add_option(
	"-r", "--rnammer", dest="rnammer", action="store",
	help="path to rnammer executable")

parser.add_option(
	"-o", "--output", dest="out_prefix_fname", action="store",
	help="name of output file")

parser.add_option(
	"-k", "--kingdoms", dest="kingdoms", action="store",
	default="arc,bac,euk", help="kingdom used")
#        default="arc,bac", help="kingdom used")

parser.add_option(
	"-m", "--moltypes", dest="moltypes", action="store",
	default="lsu,ssu,tsu", help="molecule type detected")
#        default="lsu,ssu,tsu,lsurnammer,ssurnammer,tsurnammer", help="molecule type detected")

parser.add_option(
	"-e", "--Evalue", dest="evalue", action="store", type="float",
	default=0.01, help="evalue cut-off for hmmsearch")

parser.add_option(
	"-p", "--pThreads", dest="p", action="store", type="int",
	default=1, help="number of threads for hmmsearch")

parser.add_option(
	"-T", "--tmp", dest="temp_folder", action="store", default=None, type=str,
	help="temporary folder")


try:
	(options, args) = parser.parse_args()
except:
	parser.print_help()
	sys.exit(1)

# or options.hmm_path is None
if options.input_fasta is None:
	parser.print_help()
	sys.exit(1)

rnammer = options.rnammer
if rnammer is None:
	print "no executable for rnammer given"
	parser.print_help()
	sys.exit(1)

out_prefix_fname = options.out_prefix_fname
if out_prefix_fname is None:
	parser.print_help()
	sys.exit(1)

# os.environ["HMMERDB"] += ":"+os.path.abspath(options.hmm_path)
# print os.environ["HMMERDB"]
out_prefix_fname = os.path.abspath(out_prefix_fname)
out_dir = os.path.dirname(out_prefix_fname)
fname = os.path.abspath(options.input_fasta)

tr = string.maketrans("gatcryswkmbdhvnGATCRYSWKMBDHVN", "ctagyrswmkvhdbnCTAGYRSWMKVHDBN")


def rev_record(record):
	return ">" + record.header + "|rev\n" + format(record.sequence[::-1].translate(tr))

records = [rec for rec in fasta.fasta_itr(fname)]
headers = [[rec.header, len(rec.sequence)] for rec in records]

in_fasta = out_prefix_fname + '.fa'
ff = open(in_fasta, 'w')
for (i, rec) in enumerate(records):
	ff.write('>s' + str(i) + '\n' + format(rec.sequence) + '\n')
	# ff.write('>s' + str(i) + '|rev\n' + format(rec.sequence[::-1].translate(tr)) + '\n')
ff.close()
# sys.exit(1)
# a temporary fasta file, use s(int) to easy the parsing


def parse_hmmsearch(kingdom, moltype, src):
	# function to parse hmmsearch output
	resu = []
	data = open(src).readlines()
	inds = [-1] + [i for (i, x) in enumerate(data[2]) if x == " "]
	inds = [(inds[j] + 1, inds[j + 1]) for j in range(len(inds) - 1)]
	data = [line for line in data if line[0] != "#"]
	for line in data:
		if not len(line.strip()):
			continue
		[read, acc, tlen, qname, qaccr, qlen, seq_evalue, seq_score, seq_bias,
			seq_num, seq_of, dom_cEvalue, dom_iEvalue, dom_score, dom_bias,
			hmm_start, hmm_end, dom_start, dom_end, env_start, env_end] = line.split()[:21]
		#            [line[x[0]:x[1]].strip() for x in inds[:21]]
		if string.atof(dom_iEvalue) < options.evalue:
			#            resu.append("\t".join([read, acc, tlen, qname, qaccr, \
			#                    qlen, seq_evalue, seq_score, seq_bias, seq_num, seq_of, \
			#                    dom_cEvalue, dom_iEvalue, dom_score, dom_bias, hmm_start, \
			#                    hmm_end, dom_start, dom_end, env_start, env_end]))
			resu.append("\t".join([qname, dom_start, dom_end, read, dom_iEvalue]))

	return resu

dict_rRNA = {
	'arc_lsu': '23S_rRNA', 'arc_ssu': '16S_rRNA', 'arc_tsu': '5S_rRNA',
	'bac_lsu': '23S_rRNA', 'bac_ssu': '16S_rRNA', 'bac_tsu': '5S_rRNA',
	'euk_lsu': '28S_rRNA', 'euk_ssu': '18S_rRNA', 'euk_tsu': '8S_rRNA'}

temp_dir_path = options.temp_folder
if temp_dir_path is None:
	temp_dir_path = tempfile.mkdtemp(suffix="rnammer", dir=None)

hmm_resu = []
for kingdom in options.kingdoms.split(','):
	for moltype in options.moltypes.split(','):
		# print kingdom, moltype
		# hmm_out_fname = "%s.%s_%s.out" % (out_prefix_fname, kingdom, moltype)
		# dom_out_fname = "%s.%s_%s.dom" % (out_prefix_fname, kingdom, moltype)
		out_fasta = out_prefix_fname + "." + dict_rRNA['{0}_{1}'.format(kingdom, moltype)] + ".fna"
		out_gff = out_prefix_fname + "." + dict_rRNA['{0}_{1}'.format(kingdom, moltype)] + ".gff"
		# cmd = "{0} -S {1} -m {2} -gff {3} -f {4} - < {5}".format(rnammer, kingdom, moltype, out_gff, out_fasta, in_fasta)
		cmd = "{0} -S {1} -T {6} -m {2} -gff {3} -f {4} - < {5}".format(rnammer, kingdom, moltype, out_gff, out_fasta, in_fasta, temp_dir_path)
		# subprocess.call(cmd)
		# cmd = "{0} {1} {2} {3} {4} {5} {6} {7}".format(
		# str(bash_rnammer), str(rnammer), str(kingdom), str(moltype), str(out_gff), str(out_fasta), str(in_fasta), str(temp_dir_path))
		# print cmd
		hmm_proc = subprocess.call(cmd, shell=True, bufsize=-1, cwd=temp_dir_path)
		# cmd = 'hmmsearch --cpu %d -o %s --domtblout %s -E %g %s/%s_%s.hmm %s' % \
		#    (options.p, hmm_out_fname, dom_out_fname,
		#        options.evalue, os.path.abspath(options.hmm_path), kingdom, moltype, out_fname + '.fa')
		# hmm_resu += parse_hmmsearch(os.popen(cmd))
		# print cmd
		# os.system(cmd)
		# hmm_resu += parse_hmmsearch(kingdom, moltype, dom_out_fname)
		# os.remove(hmm_out_fname)
		# os.remove(dom_out_fname)

if options.temp_folder is None:
	shutil.rmtree(temp_dir_path)
	os.remove(in_fasta)