import sys
from argparse import Namespace

import click
import tableprint
import json
import logging
import os
import magic

from piicatcher.catalog.file import FileStore
from piicatcher.explorer.metadata import NamedObject
from piicatcher.piitypes import PiiTypes, PiiTypeEncoder
from piicatcher.scanner import NERScanner, RegexScanner
from piicatcher.tokenizer import Tokenizer


@click.command('files')
@click.pass_context
@click.option("--path", type=click.Path(), help="Path to file or directory")
@click.option("--output", type=click.File(), default=None,
              help="DEPRECATED. Please use --catalog-file")
@click.option("--output-format", type=click.Choice(["ascii_table", "json", "db"]),
              help="DEPRECATED. Please use --catalog-format")
def cli(ctx, path, output, output_format):
    ns = Namespace(path=path,
                   catalog=ctx.obj['catalog'])

    if output_format is not None or output is not None:
        logging.warning("--output-format and --output is deprecated. "
                        "Please use --catalog-format and --catalog-file")

    if output_format is not None:
        ns.catalog['format'] = output_format

    if output is not None:
        ns.catalog['file'] = output

    FileExplorer.dispatch(ns)


class File(NamedObject):

    def __init__(self, name, mime_type):
        super(File, self).__init__(name, (), ())
        self._mime_type = mime_type

    def get_mime_type(self):
        return self._mime_type

    def scan(self, context):
        tokenizer = context['tokenizer']
        regex = context['regex']
        ner = context['ner']

        if not self._mime_type.startswith('text/'):
            self._pii.add(PiiTypes.UNSUPPORTED)
        else:
            with open(self.get_name(), 'r') as f:
                data = f.read()
                [self._pii.add(pii) for pii in ner.scan(data)]
                tokens = tokenizer.tokenize(data)
                for t in tokens:
                    if not t.is_stop:
                        [self._pii.add(pii) for pii in regex.scan(t.text)]


class FileExplorer:
    @classmethod
    def dispatch(cls, ns):
        logging.debug("File Dispatch entered")
        explorer = cls(ns)
        explorer.scan()

        if ns.catalog['format'] == "ascii_table":
            headers = ["Path", "Mime/Type", "pii"]
            tableprint.table(explorer.get_tabular(), headers)
        elif ns.catalog['format'] == "json":
            FileStore.save_schemas(explorer)

    def __init__(self, ns):
        self._path = ns.path
        self._files = []
        self.catalog = ns.catalog

    def scan(self):
        logging.debug("Scanning %s" % self._path)
        if os.path.isfile(self._path):
            mime_type = magic.from_file(self._path, mime=True)
            self._files.append(File(self._path, mime_type))
            logging.debug('\t- full path: %s, mime_type: %s' % (os.path.abspath(self._path), mime_type))
        else:
            for root, subdirs, files in os.walk(self._path):
                for filename in files:
                    file_path = os.path.join(root, filename)
                    mime_type = magic.from_file(file_path, mime=True)

                    logging.debug('\t- full path: %s, mime_type: %s' % (file_path, mime_type))
                    self._files.append(File(file_path, mime_type))

        context = {'tokenizer': Tokenizer(), 'regex': RegexScanner(), 'ner': NERScanner()}
        for f in self._files:
            f.scan(context)

    def get_tabular(self):
        tabular = []
        for f in self._files:
            tabular.append([f.get_name(), f.get_mime_type(), json.dumps(list(f.get_pii_types()),
                                                                        cls=PiiTypeEncoder)])

        return tabular

    def get_dict(self):
        result = []
        for f in self._files:
            result.append({
                'path': f.get_name(),
                'Mime/Type': f.get_mime_type(),
                'pii': list(f.get_pii_types())
            })

        return {'files': result}