#!/usr/bin/env python

import sys
import os
import json
import argparse
import zmq
import time

import colorama

import core.graph.memory
import core.engine
import core.token
import core.encoder

def print_line():
    print(colorama.Style.DIM + "-" * 80 + colorama.Style.RESET_ALL)

def print_item(item_type, message):
    print(
        colorama.Style.DIM + "["
        + colorama.Style.RESET_ALL + "{0}".format(item_type)
        + colorama.Style.DIM + "] " + colorama.Style.RESET_ALL + "{0}".format(message)
    )

def generator_from_element(element):
    yield element

def generator_from_file(filename, element_type):
    with open(filename, "rU") as infile:
        for line in infile:
            split = line.strip().split()
            if len(split) == 0 or len(split[0]) == 0 or split[0][0] == "#":
                continue
            yield { 'id' : split[0], 'type' : element_type }

def generator_from_json(filename):
    with open(filename, "rU") as json_file:
        for line in json_file:
            try:
                strip = line.strip()
                if len(strip) == 0:
                    continue
                yield json.loads(line.strip())
            except Exception as e:
                print("Invalid JSON task, skipped.")
                print("{}".format(e))

def generator_from_query(graph, query):
    try:
        json_query = json.loads(query)
    except:
        print(colorama.Fore.YELLOW + "Invalid graph vertex query!" + colorama.Fore.RESET)

    for vertex in graph.query_vertices(json_query):
        vertex['depth'] = 0
        yield vertex

def generator_from_zmq_pull(context, host):
    socket = context.socket(zmq.PULL)
    # TODO: Configure socket with clean properties to avoid message overload.
    if host.endswith('/'):
        host = host[:-1]
    print_item("+", "Binding ZMQ pull socket : " + colorama.Fore.CYAN + "{0}".format(host) + colorama.Style.RESET_ALL)
    socket.bind(host)

    while True:
        try:
            message = socket.recv(flags=zmq.NOBLOCK)
        except zmq.Again as e:
            message = None
        if message is None:
            yield None # NOTE: We have to make the generator non blocking.
        else:
            task = json.loads(message)
            yield task
 
if __name__ == "__main__":

    colorama.init()

    print("Miner Script (version 3.8)")

    parser = argparse.ArgumentParser()

    parser.add_argument('--domain', help='Mine from a domain.')
    parser.add_argument('--domains', help='Mine from a list of domains in a file.')
    parser.add_argument('--url', help='Mine from a URL.')
    parser.add_argument('--urls', help='Mine from a list of URLs in a file.')
    parser.add_argument('--ip', help='Mine from an IP.')
    parser.add_argument('--ips', help='Mine from a list of IPs in a file.')
    parser.add_argument('--asn', help='Mine from an ASN.')
    parser.add_argument('--asns', help='Mine from a list of ASNs in a file.')
    parser.add_argument('--email', help='Mine from an email address.')
    parser.add_argument('--emails', help='Mine from a list of emails in a file.')
    parser.add_argument('--hash', help='Mine from a hash.')
    parser.add_argument('--hashes', help='Mine from a list of hashes in a file.')
    parser.add_argument('--regex', help="Mine from a regex.")
    parser.add_argument('--regexes', help="Mine from a list of regexes in a file.")
    parser.add_argument('--query', help="Mine from graph vertices matching the query")
    parser.add_argument('--json', help="Load custom tasks from a JSON file.")
    parser.add_argument('--pull', help="Pull entries to mine from a ZMQ stream.")
    parser.add_argument('--push', help='Push mined results to a ZMQ stream.')

    parser.add_argument('--config', default='conf.json', help='Select a configuration file.')
    parser.add_argument('--profile', default='default', help='Select a mining profile.')
    parser.add_argument('--token', default=None, help='Set the mining token.')
    parser.add_argument('--ttl', default=None, help="Set the mining token TTL (in seconds).")
    parser.add_argument('--title', help='Set the dataset title.')
    parser.add_argument('--pipeline', help="Set the list of active plugins.")
    parser.add_argument('--depth', help='Set the mining maximum depth.')
    parser.add_argument('--workers', default=4, help="Set the number of worker threads.")
    parser.add_argument('--qlimit', default=None, help="Set the queue size soft limit.")
    parser.add_argument('--output', default='result.json', help='Set the output JSON filename.')
    parser.add_argument('--mongo', default=None, help='Use MongoDB as a graph database.')
    parser.add_argument('--reset', action='store_const', const=True, default=False, help="Reset graph.")
    parser.add_argument('--no-output', action='store_const', const=True, default=False, help="No JSON output.")
    parser.add_argument('--stats', action='store_const', const=True, default=False, help="Compute performance metrics.")

    args = parser.parse_args()

    # --------------------------------------------------------------------

    configuration = dict()
    seeders = list()

    with open(args.config, "rU") as conf_file:
        try:
            configuration = json.load(conf_file)
        except Exception, e:
            print("[Error] Your configuration file seems corrupt!")
            print(colorama.Fore.RED + str(e) + colorama.Style.RESET_ALL)
            sys.exit(0)

        if args.profile is not None:
            profile_path = "{0}/profiles/{1}.json".format(os.path.dirname(os.path.realpath(args.config)), args.profile)
            with open(profile_path, "rU") as profile_file:
                profile = json.load(profile_file)
                configuration.update(profile)

    # ------------------------------------------------------------------------

    print_line()

    print_item("+", "Loading miner configuration ...")

    if args.mongo is not None:
        split = args.mongo.split(':')

        port = 27017
        db = "miner"

        if len(split) >= 0:
            host = split[0]
        if len(split) >= 2:
            port = int(split[1])
        if len(split) >= 3:
            db = split[2]

        print_item("+", "Connecting to graph mongo database ...")
        print("    - Host: " + colorama.Fore.CYAN + "{0}".format(host) + colorama.Style.RESET_ALL)
        print("    - Port: " + colorama.Fore.CYAN + "{0}".format(port) + colorama.Style.RESET_ALL)
        print("    - Database: " + colorama.Fore.CYAN + "{0}".format(db) + colorama.Style.RESET_ALL)
        graph = core.graph.mongo.Graph(host=host, port=int(port), db=db)

        if args.reset:
            print_item("+", "Resetting graph ...")
            graph.clear()
    else:
        print_item("+", "Creating graph in local memory ...")
        graph = core.graph.memory.Graph()

    # ------------------------------------------------------------------------

    if args.depth is not None:
        if args.depth == "infinite":
            configuration['parameters']['depth'] = "infinite"
        else:
            configuration['parameters']['depth'] = int(args.depth)
    elif 'depth' not in configuration['parameters']:
        configuration['parameters']['depth'] = 0

    if args.stats is not None:
        configuration['parameters']['stats'] = args.stats

    if args.pipeline is not None:
        configuration['parameters']['pipeline'] = args.pipeline.split(',')

    if args.domain is not None:
        seeders.append(generator_from_element({ 'type' : 'domain', 'id' : args.domain }))
    if args.domains is not None:
        seeders.append(generator_from_file(args.domains, 'domain'))

    if args.url is not None:
        seeders.append(generator_from_element({ 'type' : 'url', 'id' : args.url }))
    if args.urls is not None:
        seeders.append(generator_from_file(args.urls, 'url'))

    if args.ip is not None:
        seeders.append(generator_from_element({ 'type' : 'ip', 'id' : args.ip }))
    if args.ips is not None:
        seeders.append(generator_from_file(args.ips, 'ip'))

    if args.asn is not None:
        seeders.append(generator_from_element({ 'type' : 'asn', 'id' : args.asn }))
    if args.asns is not None:
        seeders.append(generator_from_file(args.emails, 'asn'))

    if args.email is not None:
        seeders.append(generator_from_element({ 'type' : 'email', 'id' : args.email }))
    if args.emails is not None:
        seeders.append(generator_from_file(args.emails, 'email'))

    if args.hash is not None:
        seeders.append(generator_from_element({ 'type' : 'hash', 'id' : args.hash }))
    if args.hashes is not None:
        seeders.append(generator_from_file(args.hashes, 'hash'))

    if args.regex is not None:
        seeders.append(generator_from_element({ 'type' : 'regex', 'id' : args.regex }))
    if args.regexes is not None:
        seeders.append(generator_from_file(args.regexes, 'regex'))

    if args.json is not None:
        seeders.append(generator_from_json(args.json))

    if args.query is not None:
        seeders.append(generator_from_query(graph, args.query))

    output_socket = None
    if args.pull is not None or args.push is not None:
        context = zmq.Context()

        if args.pull is not None:
            seeders.append(generator_from_zmq_pull(context, args.pull))

        if args.push is not None:
            output_socket = context.socket(zmq.PUSH)
            host = args.push
            if host.endswith('/'):
                host = host[:-1]
            print_item("+", "Connecting ZMQ push socket : " + colorama.Fore.CYAN + "{0}".format(host) + colorama.Style.RESET_ALL)
            output_socket.connect(host)

    # ------------ Token Configuration ---------------

    try: tokens = configuration['parameters']['tokens']
    except: configuration['parameters']['tokens'] = dict()

    try: token_uuid = configuration['parameters']['tokens']['uuid']
    except: configuration['parameters']['tokens']['uuid'] = str(core.token.Token())

    try: token_ttl = configuration['parameters']['tokens']['ttl']
    except: configuration['parameters']['tokens']['ttl'] = None

    try: rules = configuration['parameters']['tokens']['rules']
    except: configuration['parameters']['tokens']['rules'] = dict()

    if args.token is not None:
        try:
            configuration['parameters']['tokens']['uuid'] = str(core.token.Token(args.token))
        except:
            print(colorama.Fore.YELLOW + "Wrong token format! Exiting" + colorama.Fore.RESET)
            sys.exit(0)

    if args.ttl is not None:
        try:
            configuration['parameters']['tokens']['ttl'] = float(args.ttl)
        except:
            print(colorama.Fore.YELLOW + "Wrong TTL format! Exiting" + colorama.Fore.RESET)
            sys.exit(0)

    print_item("+", "Token configuration")
    print("    - {c}default{r} {sep} uuid:{c}{uuid}{r}, ttl:{c}{ttl}{r}".format(
        c=colorama.Fore.CYAN,
        r=colorama.Fore.RESET,
        sep=colorama.Style.DIM + "->" + colorama.Style.RESET_ALL,
        uuid=configuration['parameters']['tokens']['uuid'] + colorama.Fore.RESET,
        ttl=configuration['parameters']['tokens']['ttl']
    ))

    for vertex_type, token_data in configuration['parameters']['tokens']['rules'].items():
        print("    - {c}{vtype}{r} {sep} {vrule}".format(
            c=colorama.Fore.CYAN,
            r=colorama.Fore.RESET,
            vtype=vertex_type,
            sep=colorama.Style.DIM + "->" + colorama.Style.RESET_ALL,
            vrule=", ".join(
                [ "{}:{}{}{}".format(k, colorama.Fore.CYAN, v, colorama.Fore.RESET) for k, v in token_data.items() ]
            )
        ))

    # ------------------------------------------------------------------------

    try:
        qlimit = int(args.qlimit)
    except:
        qlimit = None

    engine = core.engine.Engine(
        configuration,
        graph,
        workers=args.workers,
        seeders=seeders,
        output_socket=output_socket,
        qlimit=qlimit
    )

    engine.prepare_workers()
    print_item("+", "Starting engine ...")
    print_line()
    engine.start()
    print_line()
    engine.stop_workers()
    
    # ------------------------------------------------------------------------

    if not args.no_output:

        data = engine.graph.extract()

        depths = dict()
        for token in data['tokens']:
            try: depth = token['depth']
            except: depth = None
            if depth not in depths:
                depths[depth] = 1
            else:
                depths[depth] += 1
        data['properties']['bfs_signature'] = depths
        print("BFS Signature: {0}".format(depths))

        if not configuration['parameters']['stats']:
            del data['tokens']

        if args.title is not None:
            data['meta']['title'] = args.title

        print("Writing graph to '{0}' ...".format(args.output))
        with open(args.output, "w") as outfile:
            json.dump(data, outfile, indent=4, cls=core.encoder.Encoder)
 
    print("Done.")