from natsort import natsorted
from collections import OrderedDict
import pkg_resources
import logging
from typing import Dict

from epic.config import logging_settings
from epic.utils.find_readlength import (find_readlength,
                                        get_closest_readlength)

__author__ = "Endre Bakken Stovner https://github.com/endrebak/"
__license__ = "MIT"


def get_genome_size_file(genome):
    # type: (str) -> str

    genome_names = pkg_resources.resource_listdir("epic", "scripts/chromsizes")
    name_dict = {n.lower().replace(".chromsizes", ""): n for n in genome_names}

    # No try/except here, because get_egs would already have failed if genome
    # did not exist
    genome_exact = name_dict[genome.lower()]

    return pkg_resources.resource_filename(
        "epic", "scripts/chromsizes/{}".format(genome_exact))


def create_genome_size_dict(genome):
    # type: (str) -> Dict[str,int]
    """Creates genome size dict from string containing data."""

    size_file = get_genome_size_file(genome)
    size_lines = open(size_file).readlines()

    size_dict = {}
    for line in size_lines:
        genome, length = line.split()
        size_dict[genome] = int(length)

    return size_dict


def create_genome_size_dict_custom_genome(chromsizes):
    # type: (str) -> OrderedDict[str, int]

    chromosome_lengths = [l.split() for l in open(chromsizes).readlines()]

    od = OrderedDict()          # type: OrderedDict[str, int]

    for c, l in natsorted(chromosome_lengths):
        od[c] = int(l)

    return od


def get_effective_genome_length(genome, read_length):
    # type: (str, int) -> float

    genome_names = pkg_resources.resource_listdir("epic",
                                                  "scripts/effective_sizes")
    name_dict = {n.split("_")[0]: "".join(n.split("_")[:-1])
                 for n in genome_names}

    try:
        genome_exact = name_dict[genome.lower()]
        egf = pkg_resources.resource_string( # type: ignore
            "epic", "scripts/effective_sizes/{}_{}.txt".format(
                genome_exact, read_length)).split()[-1].decode()
    except KeyError:
        genome_list = "\n".join(list(name_dict.keys()))
        logging.error(
            "Genome " + genome +
            " not found.\n These are the available genomes: " + genome_list +
            "\nIf yours is not there, please request it at github.com/endrebak/epic .")

    genome_length = sum(create_genome_size_dict(genome).values())

    logging.info("Using an effective genome fraction of {}.".format(egf))

    assert float(
        egf) < 1, "Something wrong happened, effective genome fraction over 1!"

    egs = float(egf) * genome_length

    return egs