#!/usr/bin/env python3

import argparse
import copy
from datetime import datetime
import json
import os
import subprocess
from subprocess import Popen
import sys
import threading

from strict_rfc3339 import rfc3339_to_timestamp
import attr
from jsonschema import Draft4Validator, validators, FormatChecker
import singer

from terminaltables import AsciiTable



WORKING_DIR_NAME = 'singer-check-tap-data'


def extend_with_default(validator_class):
    validate_properties = validator_class.VALIDATORS["properties"]

    def set_defaults(validator, properties, instance, schema):
        for error in validate_properties(validator, properties, instance, schema):
            yield error

        for prop, subschema in properties.items():
            if "format" in subschema:
                if subschema['format'] == 'date-time' and instance.get(prop) is not None:
                    try:
                        datetime.utcfromtimestamp(rfc3339_to_timestamp(instance[prop]))
                    except Exception:
                        raise Exception('Error parsing property {}, value {}'
                                        .format(prop, instance[prop]))

    return validators.extend(validator_class, {"properties": set_defaults})


@attr.s # pylint: disable=too-few-public-methods
class StreamAcc(object):

    name = attr.ib()
    num_records = attr.ib(default=0)
    num_schemas = attr.ib(default=0)
    latest_schema = attr.ib(default=None, repr=False)


@attr.s
class OutputSummary(object):

    streams = attr.ib(default=attr.Factory(dict))
    num_states = attr.ib(default=0)

    def __init__(self):
        self.latest_state = None

    def ensure_stream(self, stream_name):
        if stream_name not in self.streams: # pylint: disable=unsupported-membership-test
            self.streams[stream_name] = StreamAcc(stream_name) # pylint: disable=unsubscriptable-object
        return self.streams[stream_name] # pylint: disable=unsubscriptable-object

    def add(self, message):
        if isinstance(message, singer.RecordMessage):
            stream = self.ensure_stream(message.stream)
            if stream.latest_schema:
                validator_fn = extend_with_default(Draft4Validator)
                validator = validator_fn(
                    stream.latest_schema, format_checker=FormatChecker())
                validator.validate(copy.deepcopy(message.record))
            else:
                print('I saw a record for stream {} before the schema'.format(
                    message.stream))
                exit(1)
            stream.num_records += 1

        elif isinstance(message, singer.SchemaMessage):
            stream = self.ensure_stream(message.stream)
            stream.num_schemas += 1
            stream.latest_schema = message.schema

        elif isinstance(message, singer.StateMessage):
            self.latest_state = message.value
            self.num_states += 1

    def num_records(self):
        return sum([stream.num_records for stream in self.streams.values()]) # pylint: disable=no-member

    def num_schemas(self):
        return sum([stream.num_schemas for stream in self.streams.values()]) # pylint: disable=no-member

    def num_messages(self):
        return self.num_records() + self.num_schemas() + self.num_states


class StdoutReader(threading.Thread):

    def __init__(self, process):
        self.process = process
        self.summary = None
        super().__init__()

    def run(self):
        self.summary = summarize_output(self.process.stdout)

    def finish_reading_logs(self):
        """Joins the thread with a timeout.

        Intended to be called on the parent thread.
        """
        print('Joining on thread {}'.format(self.name))
        self.join(timeout=5)
        if self.is_alive():
            print(
                'Thread {} did not finish within timeout'.format(self.name))
        else:
            print('Thread {} finished'.format(self.name))


def summarize_output(output):
    summary = OutputSummary()
    for line in output:
        summary.add(singer.parse_message(line))
    return summary


def print_summary(summary):

    print('The output is valid.')
    print('It contained {} messages for {} streams.'.format(
        summary.num_messages(), len(summary.streams)))
    print('')
    print('{:7} schema messages'.format(summary.num_schemas()))
    print('{:7} record messages'.format(summary.num_records()))
    print('{:7} state messages'.format(summary.num_states))
    print('')
    print('Details by stream:')
    headers = [['stream', 'records', 'schemas']]
    rows = [[s.name, s.num_records, s.num_schemas]
            for s in summary.streams.values()]
    data = headers + rows

    table = AsciiTable(data)
    print(table.table)


def run_and_summarize(tap, config, state=None, debug=False):
    cmd = [tap, '--config', config]
    if state:
        cmd += ['--state', state]
    print('Running command {}'.format(' '.join(cmd)))

    stderr = None if debug else subprocess.DEVNULL
    tap = Popen(cmd,
                stdout=subprocess.PIPE,
                stderr=stderr,
                bufsize=1,
                universal_newlines=True)
    summarizer = StdoutReader(tap)
    summarizer.start()
    returncode = tap.wait()
    if returncode != 0:
        print('ERROR: tap exited with status {}'.format(returncode))
        exit(1)

    return summarizer.summary


def check_with_no_state(args):
    return run_and_summarize(args.tap, args.config, debug=args.debug)


def check_with_state(args, state):
    state_path = os.path.join(WORKING_DIR_NAME, 'state.json')
    with open(state_path, mode='w') as state_file:
        json.dump(state, state_file)
    return run_and_summarize(
        args.tap, args.config, state=state_path, debug=args.debug)


def main():

    parser = argparse.ArgumentParser(
        description='''Verifies that a Tap conforms to the Singer
        specification.''',
        epilog='''If a --tap argument is provided, this program will
        exit zero if the Tap exits zero and produces valid output, or
        non-zero if the tap exits non-zero or if the output it
        produces is invalid. If no --tap is provided, exits zero if
        the data on stdin is valid, non-zero otherwise.''')

    parser.add_argument(
        '-t',
        '--tap',
        help='''Tap program to execute. If provided, I'll run this tap
        and check its output. Otherwise, I'll read from stdin.''')

    parser.add_argument(
        '-c',
        '--config',
        help='Config file for tap. Only used of --tap is also specified.')

    parser.add_argument(
        '-d',
        '--debug',
        action='store_true',

        help='''Turn on debugging. Show log output from tap. By default
        logging output from tap is suppressed.''')

    args = parser.parse_args()

    try:
        os.mkdir(WORKING_DIR_NAME)
    except FileExistsError:
        pass

    if args.tap:
        if not args.config:
            print('If you provide --taps you must also provide --config')
            exit(1)

    if args.tap:
        print('Checking tap {} with config {}'.format(args.tap, args.config))
        summary = check_with_no_state(args)
    else:
        print('Checking stdin for valid Singer-formatted data')
        summary = summarize_output(sys.stdin)

    print_summary(summary)

    if args.tap:
        if summary.latest_state:
            print('')
            print('')
            print('Now re-running tap with state produced by previous run')
            summary = check_with_state(args, summary.latest_state)
            print_summary(summary)

if __name__ == '__main__':
    main()