__author__ = 'byt3smith'
#
# Purpose: Tools for gathering IP addresses, domain names, URL's, etc..
#

from time import sleep
from os import chdir, path
from xlrd import open_workbook, sheet
import re
import sys
import urllib.request, urllib.error, urllib.parse
from . import pdfConverter
import unicodedata
from colorama import Fore, Back, Style, init

init(autoreset=True) ## Initialize colorama

def connect(url):
    try:
        f = urllib.request.urlopen(url).readlines()
        return f
    except:
        sys.exit(0)


def regex(ioc_type):
    ioc_patts = {
        "ip":b"((?:(?:[12]\d?\d?|[1-9]\d|[1-9])(?:\[\.\]|\.)){3}(?:[12]\d?\d?|[\d+]{1,2}))",
        "domain":b"([A-Za-z0-9]+(?:[\-|\.][A-Za-z0-9]+)*(?:\[\.\]|\.)(?:com|net|edu|ru|org|de|uk|jp|br|pl|info|fr|it|cn|in|su|pw|biz|co|eu|nl|kr|me))",
        "md5":b"\W([A-Fa-f0-9]{32})(?:\W|$)",
        "sha1":b"\W([A-Fa-f0-9]{40})(?:\W|$)",
        "sha256":b"\W([A-Fa-f0-9]{64})(?:\W|$)",
        "email":b"[a-zA-Z0-9_]+(?:\.[A-Za-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?!([a-zA-Z0-9]*\.[a-zA-Z0-9]*\.[a-zA-Z0-9]*\.))(?:[A-Za-z0-9](?:[a-zA-Z0-9-]*[A-Za-z0-9])?\.)+[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?",
        "URL":b"((?:http|ftp|https)\:\/\/(?:[\w+?\.\w+])+[a-zA-Z0-9\~\!\@\#\$\%\^\&\*\(\)_\-\=\+\\\/\?\.\:\;]+)",
        "yara":b"(rule\s[\w\W]{,30}\{[\w\W\s]*\})"
    }

    try:
        pattern = re.compile(ioc_patts[ioc_type])
    except re.error:
        print('[!] Invalid type specified.')
        sys.exit(0)

    return pattern


def gather(url, rex):
    ioc_list = []
    count = 0
    f = connect(url)
    sleep(2)
    for line in f:
        if line.startswith(b"/") or line.startswith(b"#") or line.startswith(b"\n"):
            pass
        else:
            ioc = rex.findall(line)
            for i in ioc:
                if i in ioc_list:
                    pass
                else:
                    ioc_list.append(i)
                    count += 1

    return ioc_list


def add2file(filename, ioc_list):
    if len(ioc_list) == 0:
        pass
    else:
        patt = regex('ip')
        test = patt.match(ioc_list[0])
        if test is None:
            f = open(filename, 'a+')
        else:
            f = open(filename, 'w+')

        for ioc in ioc_list:
            f.write(ioc.decode("utf-8") + '\n')
        f.close()


def extract(filename):
    ### Determine filetype to define how IOCs are processed
    if filename[-3:] == 'pdf':
        f = bytes(pdfConverter.convert_pdf_to_txt(filename), 'utf-8')
    elif filename[-3:] == 'xls' or filename[-4:] == 'xlsx':
        f = open_workbook(filename)

        datalist = []
        vallist = []
        asciilist = []
        sheet = f.sheet_by_index(0)
        cols = sheet.ncols

        for i in range(cols):
            collist = sheet.col(i)
            datalist = collist + datalist
            for cell in datalist:
                val = cell.value
                if len(val) < 2:
                    pass
                else:
                    vallist.append(val)

        for item in vallist:
            ascii_val = unicodedata.normalize('NFKD', item).encode('ascii', 'ignore')
            asciilist.append(ascii_val)
        f = bytes(', '.join(asciilist))
    else:
        f = bytes(open(filename, "r").read(), 'utf-8')

    ### Setup patterns for extraction
    ip_patt = regex('ip')
    host_patt = regex('domain')
    md5_patt = regex('md5')
    sha1_patt = regex('sha1')
    sha256_patt = regex('sha256')
    yara_patt = regex('yara')

    ### Declare temp list vars to store IOCs
    ip_list = []
    domain_list = []
    md5_list = []
    sha1_list = []
    sha256_list = []
    yara_list = []

    ### Iterate over lists of matched IOCs
    ipaddr = ip_patt.findall(f)
    for i in ipaddr:
        # Remove brackets if defanged
        i = re.sub(b'\[\.\]', b'.', i)

        if i in ip_list:
            pass
        else:
            ip_list.append(i)

    domains = host_patt.findall(f)
    for i in domains:
        # Remove brackets if defanged
        i = re.sub(b'\[\.\]', b'.', i)

        if i in domain_list:
            pass
        else:
            domain_list.append(i)

    md5_hash = md5_patt.findall(f)
    for i in md5_hash:
        if i in md5_list:
            pass
        else:
            md5_list.append(i)

    sha1_hash = sha1_patt.findall(f)
    for i in sha1_hash:
        if i in sha1_list:
            pass
        else:
            sha1_list.append(i)

    sha256_hash = sha256_patt.findall(f)
    for i in sha256_hash:
        if i in sha1_list:
            pass
        else:
            sha256_list.append(i)

    yara_rules = yara_patt.findall(f)
    for i in yara_rules:
        if i in yara_list:
            pass
        else:
            yara_list.append(i)


    ### Create _ioc file
    chdir('data/intel/')
    base = path.basename(filename)
    base_noext = path.splitext(base)[0]

    banner = '''
+-------------------+
|       RESULTS     |
+-------------------+'''
    print(banner)

    ### Write IOCs to files
    with open(base_noext + '_ioc', 'w+') as f:
        for i in ip_list:
            f.write(i.decode("utf-8") + '\n')
        f.write("\n")
        print('IPv4 Addresses [' + (Fore.GREEN + '%d' % (len(ip_list)) + Fore.RESET if len(ip_list) > 0 else Fore.RED + '%d' % (len(ip_list)) + Fore.RESET) + ']')

        for d in domain_list:
            f.write(d.decode("utf-8") + '\n')
        f.write("\n")
        print('Domain Names [' + (Fore.GREEN + '%d' % (len(domain_list)) + Fore.RESET if len(domain_list) > 0 else Fore.RED + '%d' % (len(domain_list)) + Fore.RESET) + ']')

        for m in md5_list:
            f.write(m.decode("utf-8") + '\n')
        f.write("\n")
        print('MD5 Hashes [' + (Fore.GREEN + '%d' % (len(md5_list)) + Fore.RESET if len(md5_list) > 0 else Fore.RED + '%d' % (len(md5_list)) + Fore.RESET) + ']')

        for y in yara_list:
            f.write(y.decode("utf-8") + '\n')
        f.write("\n")
        print('YARA Rules [' + (Fore.GREEN + '%d' % (len(yara_list)) + Fore.RESET if len(yara_list) > 0 else Fore.RED + '%d' % (len(yara_list)) + Fore.RESET) + ']')

        for s1 in sha1_list:
            f.write(s1.decode("utf-8") + '\n')
        f.write("\n")
        print('SHA1 Hashes [' + (Fore.GREEN + '%d' % (len(sha1_list)) + Fore.RESET if len(sha1_list) > 0 else Fore.RED + '%d' % (len(sha1_list)) + Fore.RESET) + ']')

        for s2 in sha256_list:
            f.write(s2.decode("utf-8") + '\n')
        f.write("\n")
        print('SHA256 Hashes [' + (Fore.GREEN + '%d' % (len(sha256_list)) + Fore.RESET if len(sha256_list) > 0 else Fore.RED + '%d' % (len(sha256_list)) + Fore.RESET) + ']')

    print(Fore.GREEN + "\n[+]" + Fore.RESET + " IOCs written to %s" % base_noext + '_ioc!')


def update_progress(progress):
    barLength = 20  # Modify this value to change the length of the progress bar
    status = ""
    if isinstance(progress, int):
        progress = float(progress)
    if not isinstance(progress, float):
        progress = 0
        status = "error: progress var must be float\r\n"
    if progress < 0:
        progress = 0
        status = Fore.RED + "Halt!\r\n"
    if progress >= .999:
        progress = 1
        status = Fore.GREEN + " Complete!\r\n"
    block = int(round(barLength*progress))
    text = "\r[*] Progress: [{0}] {1}% {2}".format("#"*block + "-"*(barLength-block), round(progress*100), status)
    sys.stdout.write(text)
    sys.stdout.flush()