#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
import logging
import pprint


class Options(object):
    @staticmethod
    def join_lists_into_dict(lists_to_join):
        """
        Join several lists into one dictionary

        :param lists_to_join: is a list of lists
        [['a=b', 'c=d'], ['e=f', 'z=x'], ]

        :return: None or dictionary
        {'a': 'b', 'c': 'd', 'e': 'f', 'y': 'z'}

        """

        # lists_to_join must be a list
        if not isinstance(lists_to_join, list):
            return None

        res = {}
        # res = dict {
        #   'name1': 'value1',
        #   'name2': 'value2',
        # }
        for _list in lists_to_join:
            # _list = ['a=b', 'c=d']
            for name_value_pair in _list:
                # name_value_pair contains 'a=b'
                name, value = name_value_pair.split('=', 2)
                res[name] = value

        # return with sanity check
        if len(res) > 0:
            return res
        else:
            return None

    @staticmethod
    def join_lists(lists_to_join):
        """
        Join several lists into one
        :param lists_to_join: is a list of lists
        [['a', 'b'], ['c', 'd'], ['e', 'f']]
        :return:
        ['a', 'b', 'c', 'd', 'e', 'f']
        """

        # lists_to_join must be a list
        if not isinstance(lists_to_join, list):
            return None

        res = []
        for _list in lists_to_join:
            for _item in _list:
                res.append(_item)

        return res

    @staticmethod
    def log_level_from_string(log_level_string):
        """Convert string representation of a log level into logging.XXX constant"""

        if isinstance(log_level_string, str):
            level = log_level_string.upper()

            if level == 'CRITICAL':
                return logging.CRITICAL
            if level == 'ERROR':
                return logging.ERROR
            if level == 'WARNING':
                return logging.WARNING
            if level == 'INFO':
                return logging.INFO
            if level == 'DEBUG':
                return logging.DEBUG
            if level == 'NOTSET':
                return logging.NOTSET

        return logging.NOTSET


class CLIOptions(Options):
    """Options extracted from command line"""

    default_options = {
        #
        # general app section
        #
        'config_file': '/etc/clickhouse-mysql/clickhouse-mysql.conf',
        'log_file': None,
        'log_level': None,
        'nice_pause': None,
        'dry': False,
        'daemon': False,
        'pid_file': '/tmp/reader.pid',
        'binlog_position_file': None,
        'mempool': False,
        'mempool_max_events_num': 100000,
        'mempool_max_rows_num': 100000,
        'mempool_max_flush_interval': 60,
        'csvpool': False,
        'csvpool_file_path_prefix': '/tmp/csvpool_',
        'csvpool_keep_files': False,
        'create_table_sql_template': False,
        'create_table_sql': False,
        'with_create_database': False,
        'create_table_json_template': False,
        'migrate_table': False,
        'pump_data': False,
        'install': False,

        #
        # src section
        #
        'src_server_id': None,
        'src_host': None,
        'src_port': 3306,
        'src_user': None,
        'src_password': None,
        'src_schemas': None,
        'src_tables': None,
        'src_tables_where_clauses': None,
        'src_tables_prefixes': None,
        'src_wait': False,
        'src_resume': False,
        'src_binlog_file': None,
        'src_binlog_position': None,
        'src_file': None,

        #
        # dst section
        #
        'dst_file': None,
        'dst_host': None,
        'dst_port': 9000,
        'dst_user': 'default',
        'dst_password': '',
        'dst_schema': None,
        'dst_distribute': False,
        'dst_cluster': None,
        'dst_table': None,
        'dst_table_prefix': None,
        'dst_create_table': False,

        #
        # converters section
        #
        'column_default_value': None,
        'column_skip': [],
        'ch_converter_file': None,
        'ch_converter_class': None,
    }

    def options(self):
        """Parse application's CLI options into options dictionary
        :return: instance of Config
        """

        argparser = argparse.ArgumentParser(
            description='ClickHouse data reader',
            epilog='==============='
        )

        #
        # general app section
        #
        argparser.add_argument(
            '--config-file',
            type=str,
            default=self.default_options['config_file'],
            help='Path to config file. Default - not specified'
        )
        argparser.add_argument(
            '--log-file',
            type=str,
            default=self.default_options['log_file'],
            help='Path to log file. Default - not specified'
        )
        argparser.add_argument(
            '--log-level',
            type=str,
            default=self.default_options['log_level'],
            help='Log Level. Default - NOTSET'
        )
        argparser.add_argument(
            '--nice-pause',
            type=int,
            default=self.default_options['nice_pause'],
            help='Make specified (in sec) pause between attempts to read binlog stream'
        )
        argparser.add_argument(
            '--dry',
            action='store_true',
            help='Dry mode - do not do anything that can harm. '
                 'Useful for debugging.'
        )
        argparser.add_argument(
            '--daemon',
            action='store_true',
            help='Daemon mode - go to background.'
        )
        argparser.add_argument(
            '--pid-file',
            type=str,
            default=self.default_options['pid_file'],
            help='Pid file to be used by the app in daemon mode'
        )
        argparser.add_argument(
            '--binlog-position-file',
            type=str,
            default=self.default_options['binlog_position_file'],
            help='File to write binlog position to during bin log reading and to read position from on start'
        )
        argparser.add_argument(
            '--mempool',
            action='store_true',
            help='Cache data in mem.'
        )
        argparser.add_argument(
            '--mempool-max-events-num',
            type=int,
            default=self.default_options['mempool_max_events_num'],
            help='Max events number to pool - triggering pool flush'
        )
        argparser.add_argument(
            '--mempool-max-rows-num',
            type=int,
            default=self.default_options['mempool_max_rows_num'],
            help='Max rows number to pool - triggering pool flush'
        )
        argparser.add_argument(
            '--mempool-max-flush-interval',
            type=int,
            default=self.default_options['mempool_max_flush_interval'],
            help='Max seconds number between pool flushes'
        )
        argparser.add_argument(
            '--csvpool',
            action='store_true',
            help='Cache data in CSV pool files on disk. Requires memory pooling, '
                 'thus enables --mempool even if it is not explicitly specified'
        )
        argparser.add_argument(
            '--csvpool-file-path-prefix',
            type=str,
            default=self.default_options['csvpool_file_path_prefix'],
            help='File path prefix to CSV pool files'
        )
        argparser.add_argument(
            '--csvpool-keep-files',
            action='store_true',
            help='Keep CSV pool files. Useful for debugging'
        )
        argparser.add_argument(
            '--create-table-sql-template',
            action='store_true',
            help='Prepare CREATE TABLE SQL template(s).'
        )
        argparser.add_argument(
            '--create-table-sql',
            action='store_true',
            help='Prepare CREATE TABLE SQL statement(s).'
        )
        argparser.add_argument(
            '--with-create-database',
            action='store_true',
            help='Prepend each CREATE TABLE SQL statement(s) with CREATE DATABASE statement'
        )
        argparser.add_argument(
            '--create-table-json-template',
            action='store_true',
            help='Prepare CREATE TABLE template(s) as JSON. Useful for IPC'
        )
        argparser.add_argument(
            '--migrate-table',
            action='store_true',
            help='Migrate table(s). Copy existing data from MySQL table(s) with SELECT statement. '
                 'Binlog is not read during this procedure - just copy data from the src table(s). '
                 'IMPORTANT!. Target table has to be created in ClickHouse '
                 'or it has to be created with --dst-create-table and possibly with --with-create-database options. '
                 'See --create-table-sql-template and --create-table-sql options for additional info. '
        )
        argparser.add_argument(
            '--pump-data',
            action='store_true',
            help='Pump data from MySQL binlog into ClickHouse. Copy rows from binlog until the end of binlog reached. '
                 'When end of binlog reached, process ends. '
                 'Use in combination with --src-wait in case would like to continue and wait for new rows '
                 'after end of binlog reached'
        )
        argparser.add_argument(
            '--install',
            action='store_true',
            help='Install service file(s)'
        )

        #
        # src section
        #
        argparser.add_argument(
            '--src-server-id',
            type=int,
            default=self.default_options['src_server_id'],
            help='Set server_id to be used when reading date from MySQL src. Ex.: 1'
        )
        argparser.add_argument(
            '--src-host',
            type=str,
            default=self.default_options['src_host'],
            help='Host to be used when reading from src. Ex.: 127.0.0.1'
        )
        argparser.add_argument(
            '--src-port',
            type=int,
            default=self.default_options['src_port'],
            help='Port to be used when reading from src. Ex.: 3306'
        )
        argparser.add_argument(
            '--src-user',
            type=str,
            default=self.default_options['src_user'],
            help='Username to be used when reading from src. Ex.: root'
        )
        argparser.add_argument(
            '--src-password',
            type=str,
            default=self.default_options['src_password'],
            help='Password to be used when reading from src. Ex.: qwerty'
        )
        argparser.add_argument(
            '--src-schemas',
            type=str,
            default=self.default_options['src_schemas'],
            help='Comma-separated list of databases (a.k.a schemas) to be used when reading from src. Ex.: db1,db2,db3'
        )
        argparser.add_argument(
            '--src-tables',
            type=str,
            default=self.default_options['src_tables'],
            help='Comma-separated list of tables to be used when reading from src. '
                 'Ex.: table1,table2,table3'
                 'Ex.: db1.table1,db2.table2,db3.table3'
                 'Ex.: table1,db2.table2,table3'
        )
        argparser.add_argument(
            '--src-tables-where-clauses',
            type=str,
            default=self.default_options['src_tables_where_clauses'],
            help='Comma-separated list of WHERE clauses for tables to be migrated. '
                 'Ex.: db1.t1="a=1 and b=2",db2.t2="c=3 and k=4". '
                 'Accepts both (comma-separated) clause (useful for short clauses) or '
                 'file where clause is located (useful for long clauses)'
        )
        argparser.add_argument(
            '--src-tables-prefixes',
            type=str,
            default=self.default_options['src_tables_prefixes'],
            help='Comma-separated list of table prefixes to be used when reading from src.'
                 'Useful when we need to process unknown-in-advance tables, say day-named log tables, as log_2017_12_27'
                 'Ex.: mylog_,anotherlog_,extralog_3'
        )
        argparser.add_argument(
            '--src-wait',
            action='store_true',
            help='Wait indefinitely for new records to come.'
        )
        argparser.add_argument(
            '--src-resume',
            action='store_true',
            help='Resume reading from previous position. Previous position is read from `binlog-position-file`'
        )
        argparser.add_argument(
            '--src-binlog-file',
            type=str,
            default=self.default_options['src_binlog_file'],
            help='Binlog file to be used to read from src. Related to `binlog-position-file`. '
                 'Ex.: mysql-bin.000024'
        )
        argparser.add_argument(
            '--src-binlog-position',
            type=int,
            default=self.default_options['src_binlog_position'],
            help='Binlog position to be used when reading from src. Related to `binlog-position-file`. '
                 'Ex.: 5703'
        )
        argparser.add_argument(
            '--src-file',
            type=str,
            default=self.default_options['src_file'],
            help='Source file to read data from. CSV'
        )

        #
        # dst section
        #
        argparser.add_argument(
            '--dst-file',
            type=str,
            default=self.default_options['dst_file'],
            help='Target file to be used when writing data. CSV'
        )
        argparser.add_argument(
            '--dst-host',
            type=str,
            default=self.default_options['dst_host'],
            help='Host to be used when writing to dst. Ex.: 127.0.0.1'
        )
        argparser.add_argument(
            '--dst-port',
            type=int,
            default=self.default_options['dst_port'],
            help='Port to be used when writing to dst. Ex.: 9000'
        )
        argparser.add_argument(
            '--dst-user',
            type=str,
            default=self.default_options['dst_user'],
            help='Username to be used when writing to dst. Ex: default'
        )
        argparser.add_argument(
            '--dst-password',
            type=str,
            default=self.default_options['dst_password'],
            help='Password to be used when writing to dst. Ex.: qwerty'
        )
        argparser.add_argument(
            '--dst-schema',
            type=str,
            default=self.default_options['dst_schema'],
            help='Database (a.k.a schema) to be used to create tables in ClickHouse. '
                 'It overwrites source database(s) name(s), so tables in ClickHouse '
                 'would be located in differently named db than in MySQL. '
                 'Ex.: db1'
        )
        argparser.add_argument(
            '--dst-distribute',
            action='store_true',
            default=self.default_options['dst_distribute'],
            help='Whether to add distribute table'
        )
        argparser.add_argument(
            '--dst-cluster',
            type=str,
            default=self.default_options['dst_cluster'],
            help='Cluster to be used when writing to dst. Ex.: cluster1'
        )
        argparser.add_argument(
            '--dst-table',
            type=str,
            default=self.default_options['dst_table'],
            help='Table to be used when writing to dst. Ex.: table1'
        )
        argparser.add_argument(
            '--dst-table-prefix',
            type=str,
            default=self.default_options['dst_table_prefix'],
            help='Prefix to be used when creating dst table. Ex.: copy_table_'
        )
        argparser.add_argument(
            '--dst-create-table',
            action='store_true',
            help='Prepare and run CREATE TABLE SQL statement(s).'
        )

        #
        # converters section
        #
        argparser.add_argument(
            '--column-default-value',
            type=str,
            nargs='*',
            action='append',
            default=self.default_options['column_default_value'],
            help='Set of key=value pairs for columns default values. '
                 'Ex.: date_1=2000-01-01 timestamp_1=2002-01-01\ 01:02:03'
        )
        argparser.add_argument(
            '--column-skip',
            type=str,
            nargs='*',
            action='append',
            default=self.default_options['column_skip'],
            help='Set of column names to skip. Ex.: column1 column2'
        )
        argparser.add_argument(
            '--ch-converter-file',
            type=str,
            default=self.default_options['ch_converter_file'],
            help='Filename where to search for CH converter class'
        )
        argparser.add_argument(
            '--ch-converter-class',
            type=str,
            default=self.default_options['ch_converter_class'],
            help='Converter class name in --ch-converter-file file'
        )

        args = argparser.parse_args()

        return {
            #
            # general app section
            #
            'config_file': args.config_file,
            'log_file': args.log_file,
            'log_level': args.log_level,
            'nice_pause': args.nice_pause,
            'dry': args.dry,
            'daemon': args.daemon,
            'pid_file': args.pid_file,
            'binlog_position_file': args.binlog_position_file,
            'mempool': args.mempool, # csvpool assumes mempool to be enabled
            'mempool_max_events_num': args.mempool_max_events_num,
            'mempool_max_rows_num': args.mempool_max_rows_num,
            'mempool_max_flush_interval': args.mempool_max_flush_interval,
            'csvpool': args.csvpool,
            'csvpool_file_path_prefix': args.csvpool_file_path_prefix,
            'csvpool_keep_files': args.csvpool_keep_files,
            'create_table_sql_template': args.create_table_sql_template,
            'create_table_sql': args.create_table_sql,
            'with_create_database': args.with_create_database,
            'create_table_json_template': args.create_table_json_template,
            'migrate_table': args.migrate_table,
            'pump_data': args.pump_data,
            'install': args.install,

            #
            # src section
            #
            'src_server_id': args.src_server_id,
            'src_host': args.src_host,
            'src_port': args.src_port,
            'src_user': args.src_user,
            'src_password': args.src_password,
            'src_schemas': [x for x in args.src_schemas.split(',') if x] if args.src_schemas else self.default_options['src_schemas'],
            'src_tables': [x for x in args.src_tables.split(',') if x] if args.src_tables else self.default_options['src_tables'],
            'src_tables_where_clauses': [x for x in args.src_tables_where_clauses.split(',') if x] if args.src_tables_where_clauses else self.default_options['src_tables_where_clauses'],
            'src_tables_prefixes': [x for x in args.src_tables_prefixes.split(',') if x] if args.src_tables_prefixes else self.default_options['src_tables_prefixes'],
            'src_wait': args.src_wait,
            'src_resume': args.src_resume,
            'src_binlog_file': args.src_binlog_file,
            'src_binlog_position': args.src_binlog_position,
            'src_file': args.src_file,

            #
            # dst section
            #
            'dst_file': args.dst_file,
            'dst_host': args.dst_host,
            'dst_port': args.dst_port,
            'dst_user': args.dst_user,
            'dst_password': args.dst_password,
            'dst_schema': args.dst_schema,
            'dst_distribute': args.dst_distribute,
            'dst_cluster': args.dst_cluster,
            'dst_table': args.dst_table,
            'dst_table_prefix': args.dst_table_prefix,
            'dst_create_table': args.dst_create_table,

            #
            # converters section
            #
            'column_default_value': CLIOptions.join_lists_into_dict(args.column_default_value),
            'column_skip': CLIOptions.join_lists(args.column_skip),
            'ch_converter_file': args.ch_converter_file,
            'ch_converter_class': args.ch_converter_class,
        }

from configobj import ConfigObj


class ConfigFileOptions(Options):
    """Options extracted from configuration files"""

    @staticmethod
    def options(filename):

        #
        def transform(section, key):
            new_key = key.replace('-', '_')
            section.rename(key, new_key)

        # fetch base config
        try:
            base_config = ConfigObj(
                infile='/etc/clickhouse-mysql/config.ini',
                encoding="utf-8",
                default_encoding="utf-8",
                list_values=True,
                create_empty=False,  # create empty config file
                stringify=True,
                raise_errors=False,
                file_error=False,
            )
        except:
            base_config = None

        # fetch user config
        try:
            user_config = ConfigObj(
                filename,
                encoding="utf-8",
                default_encoding="utf-8",
                list_values=True,
                create_empty=False,  # create empty config file
                stringify=True,
                raise_errors=False,
                file_error=False,
            )
        except:
            user_config = None

        # merge base and user configs
        # user config has priority over base config

        if base_config and user_config:
            base_config.merge(user_config)
            base_config.walk(transform, call_on_sections=True)
            return base_config

        if base_config:
            base_config.walk(transform, call_on_sections=True)
            return base_config

        if user_config:
            user_config.walk(transform, call_on_sections=True)
            return user_config

        return None


class AggregatedOptions(object):
    """Aggregated and prioritized options"""

    cli_opts = None
    cfg_opts = None
    env_opts = None

    cli = None

    def __init__(self):
        """Build aggregated options"""
        self.cli = CLIOptions()

        self.cli_opts = self.cli.options()
        self.cfg_opts = ConfigFileOptions.options(self.cli_opts['config_file'])

    def get_from_src(self, src, *coordinates):
        """Fetch an option by specified coordinates from provided source"""

        first_iteration = True
        for coordinate in coordinates:
            try:
                section = src[coordinate] if first_iteration else section[coordinate]
            except:
                return None
            first_iteration = False

        return section

    def get(self, *coordinates):
        """
        Fetch an option by specified coordinates according to source priorities.
        Priority would be:
        1. config (lower priority)
        2. CLI opts
        """
        cfg_opt = self.get_from_src(self.cfg_opts, *coordinates)
        cli_opt = self.get_from_src(self.cli_opts, *coordinates)
        cli_def = self.get_from_src(self.cli.default_options, *coordinates)

        if cli_opt != cli_def:
            # CLI opt is set - it is not default one - top priority
            return cli_opt

        # here CLI option is a default one

        if cfg_opt is not None:
            # cfg opt - is set lower priority
            return cfg_opt

        # option not available - return CLI default
        return cli_def

    def get_int(self, *coordinates):
        value = self.get(*coordinates)
        if value is not None:
            value = int(value)
        return value

    def get_list(self, *coordinates):
        value = self.get(*coordinates)

        # return None as it is
        if value is None:
            return None

        # return list-like type as it is
        if isinstance(value, (list, set, dict, tuple)):
            return value

        # wrap value in a list
        return [value]

    def get_bool(self, *coordinates):
        value = self.get(*coordinates)

        if value is None:
            # None is not interpreted
            return None
        elif isinstance(value, bool):
            # bool is ready-to-use
            return value
        elif isinstance(value, str):
            # str can be interpreted as "yes", "1", "on"
            value = value.upper()
            if (value == '1') or (value == 'YES') or (value == 'ON'):
                return True
            else:
                return False
        else:
            # int and all the rest just cast into bool
            return bool(value)

    def __getitem__(self, coordinates_tuple):
        if isinstance(coordinates_tuple, tuple):
            return self.get(*coordinates_tuple)
        else:
            return self.get(coordinates_tuple)

    def __str__(self):
        str = 'OPTIONS:\n'
        if self.cli_opts:
            str += 'CLI: =================\n'
            str += pprint.pformat(self.cli_opts)
            str += '\n'

        if self.cfg_opts:
            dict = self.cfg_opts.walk(lambda section, key: section[key])
            str += 'CFG: =================\n'
            str += pprint.pformat(dict)
            str += '\n'

        return str