# -*- coding: utf-8 -*-

import time
import re
import logging
import yaml
import apt_pkg
from aptly.exceptions import AptlyException, NoSuchPublish
from aptly.decorators import CachedMethod

lg = logging.getLogger(__name__)


def load_publish(publish):
    with open(publish, 'r') as publish_file:
        return yaml.load(publish_file)


class PublishManager(object):
    """
    Manage multiple publishes
    """
    def __init__(self, client, storage=""):
        self.client = client
        self._publishes = {}
        self.storage = storage
        self.timestamp = int(time.time())

    def publish(self, distribution, storage=""):
        """
        Get or create publish
        """
        try:
            return self._publishes[distribution]
        except KeyError:
            self._publishes[distribution] = Publish(self.client, distribution, timestamp=self.timestamp, storage=(storage or self.storage))
            return self._publishes[distribution]

    def add(self, snapshot, distributions, component='main', storage=""):
        """ Add mirror or repo to publish """
        for dist in distributions:
            self.publish(dist, storage=storage).add(snapshot, component)

    def restore_publish(self, components, restore_file, recreate):
        publish_file = load_publish(restore_file)
        publish_source = Publish(self.client, publish_file.get('publish'), storage=publish_file.get('storage', self.storage))
        publish_source.restore_publish(publish_file,
                                       components=components,
                                       recreate=recreate)

    def dump_publishes(self, publishes_to_save, dump_dir, prefix):

        if len(dump_dir) > 1 and dump_dir[-1] == '/':
            dump_dir = dump_dir[:-1]

        save_list = []
        save_all = True

        if publishes_to_save and not ('all' in publishes_to_save):
            save_all = False

        re_publish = None
        if len(publishes_to_save) == 1 and re.search(r'\(.*\)', publishes_to_save[0]):
            re_publish = re.compile(publishes_to_save[0])

        publishes = self.client.do_get('/publish')
        for publish in publishes:
            name = "{}{}{}".format(publish['Storage']+":" if publish['Storage']
                                else "", publish['Prefix']+"/" if
                                publish['Prefix'] else "",
                                publish['Distribution'])

            if not re_publish or re_publish.match(name):
                if save_all or name in publishes_to_save or re_publish:
                    current_publish = Publish(self.client, name, load=True, storage=publish.get('Storage', self.storage))
                    if current_publish not in save_list:
                        save_list.append(current_publish)

        if not save_all and not re_publish and len(save_list) != len(publishes_to_save):
            raise Exception('Publish(es) required not found')

        for publish in save_list:
            storage = '' if not publish.storage else '-{}-'.format(publish.storage)
            save_path = ''.join([dump_dir, '/', prefix, storage, publish.name.replace('/', '-'), '.yml'])
            publish.save_publish(save_path)

    def _publish_match(self, publish, names=False, name_only=False):
        """
        Check if publish name matches list of names or regex patterns
        """
        if names:
            for name in names:
                if not name_only and isinstance(name, re._pattern_type):
                    if re.match(name, publish.name):
                        return True
                else:
                    operand = name if name_only else [name, './%s' % name]
                    if publish in operand:
                        return True
            return False
        else:
            return True

    def do_publish(self, *args, **kwargs):
        try:
            publish_dist = kwargs.pop('dist')
        except KeyError:
            publish_dist = None

        try:
            publish_names = kwargs.pop('names')
        except KeyError:
            publish_names = None

        for publish in self._publishes.values():
            if self._publish_match(publish.name, publish_names or publish_dist, publish_names):
                publish.do_publish(*args, **kwargs)
            else:
                lg.info("Skipping publish %s not matching publish names" % publish.name)

    def list_uniq(self, seq):
        keys = {}
        for e in seq:
            keys[e] = 1
        return list(keys.keys())

    def do_purge(self, config, components=[], hard_purge=False):
        (repo_dict, publish_dict) = self.get_repo_information(config, self.client, hard_purge, components)
        publishes = self.client.do_get('/publish')
        publish_list = []

        for publish in publishes:
            name = '{}/{}'.format(publish['Prefix'].replace("/", "_"), publish['Distribution'])
            publish_list.append(Publish(self.client, name, load=True))

        for publish in publish_list:
            repo_dict = publish.purge_publish(repo_dict, publish_dict, components, publish=True)

        if hard_purge:
            self.remove_unused_packages(repo_dict)
            self.cleanup_snapshots()

    @staticmethod
    def get_repo_information(config, client, fill_repo=False, components=[]):
        """ fill two dictionnaries : one containing all the packages for every repository
            and the second one associating to every component of every publish its repository"""
        repo_dict = {}
        publish_dict = {}

        for origin in ['repo', 'mirror']:
            for name, repo in config.get(origin, {}).items():
                if components and repo.get('component') not in components:
                    continue
                if fill_repo and origin == 'repo':
                    packages = Publish._get_packages("repos", name)
                    repo_dict[name] = packages
                for distribution in repo.get('distributions'):
                    publish_name = str.join('/', distribution.split('/')[:-1])
                    publish_dict[(publish_name, repo.get('component'))] = name

        return (repo_dict, publish_dict)

    def remove_unused_packages(self, repo_dict):
        for repo_name, packages in repo_dict.items():
            if packages:
                self.client.do_delete('/repos/%s/packages' % repo_name, data={'PackageRefs': packages})

    def cleanup_snapshots(self):
        # requesting graph.dot always works, even if graphviz (dot) is not installed
        dot_data = self.client.do_get('/graph.dot')

        # extract edges from dot-data
        # edges = list of pairs with (node_id, node_id)
        edges = re.findall('"([^"]+)"->"([^"]+)";', dot_data)

        # extract nodes from dot-data
        # list of tuples with (node_id, node_type, node_name)
        # node_type is one of 'Repo'|'Snapshot'|'Publish'
        nodes_raw = re.findall('[ \t]+"([^"]+)".*label="{(Repo|Snapshot|Published) ([^|]+)[^\}"]+}"', dot_data)

        # convert nodes_raw into nodes
        # nodes = dict of node_id to node
        # node = (node_type, node_name)
        nodes = {node_id: (node_type, node_name) for node_id, node_type, node_name in nodes_raw}

        # start with published and flood fill other nodes
        published_node_ids = [key for key in nodes if nodes[key][0] == 'Published']

        # do flood fill
        while True:
            # get a list of source node_ids of incoming edges
            incoming_node_ids = [edge_from for edge_from, edge_to in edges if edge_to in published_node_ids and edge_from not in published_node_ids]

            # add incoming node_ids
            published_node_ids.extend(incoming_node_ids)

            # no new nodes, we are finished
            if len(incoming_node_ids) == 0:
                break

        # we got published nodes, invert the set
        non_published_node_ids = [node_id for node_id in nodes if node_id not in published_node_ids]

        # list of nodes with (node_type, node_name)
        unreleased_nodes = [nodes[node_id] for node_id in non_published_node_ids]

        # use only nodes of type 'Snapshot'
        unreleased_snapshots = [node[1] for node in unreleased_nodes if node[0] == 'Snapshot']

        # actually delete snapshots that are not published
        for snapshot in unreleased_snapshots:
            lg.info("Deleting snapshot %s" % snapshot)
            try:
                self.client.do_delete('/snapshots/%s' % snapshot)
            except AptlyException as e:
                if e.res.status_code == 409:
                    lg.warning("Snapshot %s is being used, can't delete" % snapshot)
                else:
                    raise


class Publish(object):
    """
    Single publish object
    """
    def __init__(self, client, distribution, timestamp=None, recreate=False, load=False, merge_prefix='_', storage="", architectures=[]):
        self.client = client
        self.recreate = recreate
        self.architectures = architectures

        # Try to get storage from distribution (eg. s3:mys3:xenial)
        dist_split = distribution.split(':')
        if len(dist_split) > 1:
            self.storage = "{}:{}".format(dist_split[0], dist_split[1])
            distribution = dist_split[-1]
        else:
            self.storage = storage

        dist_split = distribution.split('/')
        self.distribution = dist_split[-1]
        if dist_split[0] != self.distribution:
            self.prefix = "_".join(dist_split[:-1])
        else:
            self.prefix = ''

        self.name = '%s/%s' % (self.prefix or '.', self.distribution)
        self.full_name = "{}{}{}".format(self.storage+":" if self.storage else
                                         "", self.prefix+"/" if self.prefix else
                                         "", self.distribution)

        if not timestamp:
            self.timestamp = int(time.time())
        else:
            self.timestamp = timestamp

        self.merge_prefix = merge_prefix
        self.components = {}
        self.publish_snapshots = []

        if load:
            # Load information from remote immediately
            self.load()

    def __eq__(self, other):
        if not isinstance(other, Publish):
            return False

        diff, equal = self.compare(other)
        if not diff:
            return True

    def __ne__(self, other):
        return not self.__eq__(other)

    def compare(self, other, components=[]):
        """
        Compare two publishes
        It expects that other publish is same or older than this one

        Return tuple (diff, equal) of dict {'component': ['snapshot']}
        """
        lg.debug("Comparing publish %s (%s) and %s (%s)" % (self.name, self.storage or "local", other.name, other.storage or "local"))

        diff, equal = ({}, {})

        for component, snapshots in self.components.items():
            if component not in list(other.components.keys()):
                # Component is missing in other
                diff[component] = snapshots
                continue

            equal_snapshots = list(set(snapshots).intersection(other.components[component]))
            if equal_snapshots:
                lg.debug("Equal snapshots for %s: %s" % (component, equal_snapshots))
                equal[component] = equal_snapshots

            diff_snapshots = list(set(snapshots).difference(other.components[component]))
            if diff_snapshots:
                lg.debug("Different snapshots for %s: %s" % (component, diff_snapshots))
                diff[component] = diff_snapshots

        return (diff, equal)

    def get_component_snapshot(self, component):
        if component in self.components.keys():
            return self.components[component][0]
        return ""

    def replace_snapshot(self, component, new_snapshot):
        newlist = []
        if component in self.components.keys():
            for snapshot in self.publish_snapshots:
                if snapshot["Name"] not in self.components[component] and snapshot not in newlist:
                    newlist.append(snapshot)
        else:
            newlist = self.publish_snapshots
        newlist.append({
            'Component': component,
            'Name': new_snapshot
        })

        self.components[component] = [new_snapshot]
        self.publish_snapshots = newlist

    def create_snapshot_from_packages(self, packages, name, description):
        self.client.do_post(
            '/snapshots',
            data={
                'Name': name,
                'SourceSnapshots': [],
                'Description': description,
                'PackageRefs': packages,
            }
        )

    @staticmethod
    @CachedMethod
    def _get_packages(client, source_type, source_name):
        return client.do_get('/{}/{}/packages'.format(source_type, source_name))

    @staticmethod
    @CachedMethod
    def _get_publishes(client):
        return client.do_get('/publish')

    @staticmethod
    @CachedMethod
    def _get_snapshots(client):
        return client.do_get('/snapshots', {'sort': 'time'})

    def _get_publish(self):
        """
        Find this publish on remote
        """
        publishes = self._get_publishes(self.client)
        for publish in publishes:
            if publish['Distribution'] == self.distribution and \
                    publish['Prefix'].replace("/", "_") == (self.prefix or '.') and \
                    publish['Storage'] == self.storage:
                return publish
        raise NoSuchPublish("Publish %s (%s) does not exist" % (self.name, self.storage or "local"))

    def _remove_snapshots(self, snapshots):
        for snapshot in snapshots:
            self.client.do_delete('/snapshots/%s' % snapshot)

    def save_publish(self, save_path):
        """
        Serialize publish in YAML
        """
        timestamp = time.strftime("%Y%m%d%H%M%S")

        yaml_dict = {}
        yaml_dict["publish"] = self.name
        yaml_dict["name"] = timestamp
        yaml_dict["components"] = []
        yaml_dict["storage"] = self.storage
        for component, snapshots in self.components.items():
            packages = self.get_packages(component)
            package_dict = []
            for package in packages:
                (arch, name, version, ref) = self.parse_package_ref(package)
                package_dict.append({'package': name, 'version': version, 'arch': arch, 'ref': ref})
            snapshot = self._find_snapshot(snapshots[0])

            yaml_dict["components"].append({'component': component, 'snapshot': snapshot['Name'],
                                            'description': snapshot['Description'], 'packages': package_dict})

        name = self.name.replace('/', '-')
        lg.info("Saving publish %s in %s" % (name, save_path))
        with open(save_path, 'w') as save_file:
            yaml.dump(yaml_dict, save_file, default_flow_style=False)

    def purge_publish(self, repo_dict, publish_dict, components=[], publish=False):

        apt_pkg.init_system()

        new_publish_snapshots = []

        for snapshot in self.publish_snapshots:
            # packages to be kept
            processed = []
            name = snapshot["Name"]
            component = snapshot["Component"]
            purge_packages = []
            location = self.name.split('/')[0].replace('_', '/')

            if (location, component) in publish_dict:
                repo_name = publish_dict[(location, component)]
            else:
                new_publish_snapshots.append(snapshot)
                continue

            if components and component not in components:
                new_publish_snapshots.append(snapshot)
                if repo_dict:
                    repo_dict[repo_name] = []
                continue

            packages = self._get_packages(self.client, "snapshots", name)
            packages = sorted(packages, key=lambda x: self.parse_package_ref(x)[2], reverse=True, cmp=apt_pkg.version_compare)

            for package in packages:
                package_name = self.parse_package_ref(package)[1]

                if package_name not in processed:
                    processed.append(package_name)
                    if repo_dict and repo_name in repo_dict and package in repo_dict[repo_name]:
                        repo_dict[repo_name].remove(package)

                    purge_packages.append(package)

            if purge_packages != packages:
                snapshot_name = '{}-{}'.format(name, 'purged')
                try:
                    lg.debug("Creating new snapshot: %s" % snapshot_name)
                    self.client.do_post(
                        '/snapshots',
                        data={
                            'Name': snapshot_name,
                            'SourceSnapshots': [],
                            'Description': 'Minimal snapshot from {}'.format(repo_name),
                            'PackageRefs': purge_packages,
                        }
                    )

                except AptlyException as e:
                    if e.res.status_code == 404:
                        raise Exception('Error while creating snapshot : {}'.format(repr(e)))
                    else:
                        lg.debug("Snapshot %s already exist" % snapshot_name)

                new_publish_snapshots.append({
                    'Component': component,
                    'Name': snapshot_name
                })
            else:
                new_publish_snapshots.append(snapshot)

        if self.publish_snapshots != new_publish_snapshots:
            self.publish_snapshots = new_publish_snapshots
            if publish:
                self.do_publish(recreate=False, merge_snapshots=False)

        return repo_dict

    def restore_publish(self, config, components, recreate=False):
        """
        Restore publish from config file
        """
        if "all" in components:
            components = []

        try:
            self.load()
            publish = True
        except NoSuchPublish:
            publish = False

        new_publish_snapshots = []
        to_publish = []
        created_snapshots = []

        for saved_component in config.get('components', []):
            component_name = saved_component.get('component')

            if not component_name:
                raise Exception("Corrupted file")

            if components and component_name not in components:
                continue

            saved_packages = []
            if not saved_component.get('packages'):
                raise Exception("Component %s is empty" % component_name)

            for package in saved_component.get('packages'):
                package_ref = '{} {} {} {}'.format(package.get('arch'), package.get('package'), package.get('version'), package.get('ref'))
                saved_packages.append(package_ref)

            to_publish.append(component_name)

            timestamp = time.strftime("%Y%m%d%H%M%S")
            snapshot_name = '{}-{}-{}'.format("restored", timestamp, saved_component.get('snapshot'))
            lg.debug("Creating snapshot %s for component %s of packages: %s"
                     % (snapshot_name, component_name, saved_packages))

            try:
                self.client.do_post(
                    '/snapshots',
                    data={
                        'Name': snapshot_name,
                        'SourceSnapshots': [],
                        'Description': saved_component.get('description'),
                        'PackageRefs': saved_packages,
                    }
                )
                created_snapshots.append(snapshot_name)
            except AptlyException as e:
                if e.res.status_code == 404:
                    # delete all the previously created
                    # snapshots because the file is corrupted
                    self._remove_snapshots(created_snapshots)
                    raise Exception("Source snapshot or packages don't exist")
                else:
                    raise

            new_publish_snapshots.append({
                'Component': component_name,
                'Name': snapshot_name
            })

        if components:
            self.publish_snapshots = [x for x in self.publish_snapshots if x['Component'] not in components and x['Component'] not in to_publish]
            check_components = [x for x in new_publish_snapshots if x['Component'] in components]
            if len(check_components) != len(components):
                self._remove_snapshots(created_snapshots)
                raise Exception("Not possible to find all the components required in the backup file")

        self.publish_snapshots += new_publish_snapshots
        self.do_publish(recreate=recreate, merge_snapshots=False)

    def load(self):
        """
        Load publish info from remote
        """
        publish = self._get_publish()
        self.architectures = publish['Architectures']
        for source in publish['Sources']:
            component = source['Component']
            snapshot = source['Name']
            self.publish_snapshots.append({
                'Component': component,
                'Name': snapshot
            })

            snapshot_remote = self._find_snapshot(snapshot)
            for source in self._get_source_snapshots(snapshot_remote, fallback_self=True):
                self.add(source, component)

    def get_packages(self, component=None, components=[], packages=None):
        """
        Return package refs for given components
        """
        if component:
            components = [component]

        package_refs = []
        for snapshot in self.publish_snapshots:
            if component and snapshot['Component'] not in components:
                # We don't want packages for this component
                continue

            component_refs = self._get_packages(self.client, "snapshots", snapshot['Name'])
            if packages:
                # Filter package names
                for ref in component_refs:
                    if self.parse_package_ref(ref)[1] in packages:
                        package_refs.append(ref)
            else:
                package_refs.extend(component_refs)

        return package_refs

    def parse_package_ref(self, ref):
        """
        Return tuple of architecture, package_name, version, id
        """
        if not ref:
            return None
        parsed = re.match('(.*)\ (.*)\ (.*)\ (.*)', ref)
        return parsed.groups()

    def add(self, snapshot, component='main'):
        """
        Add snapshot of component to publish
        """
        try:
            self.components[component].append(snapshot)
        except KeyError:
            self.components[component] = [snapshot]

    def _find_snapshot(self, name):
        """
        Find snapshot on remote by name or regular expression
        """
        remote_snapshots = self._get_snapshots(self.client)
        for remote in reversed(remote_snapshots):
            if remote["Name"] == name or \
                    re.match(name, remote["Name"]):
                return remote
        return None

    def _get_source_snapshots(self, snapshot, fallback_self=False):
        """
        Get list of source snapshot names of given snapshot

        TODO: we have to decide by description at the moment
        """
        if not snapshot:
            return []

        source_snapshots = re.findall(r"'([\w\d\.-]+)'", snapshot['Description'])
        if not source_snapshots and fallback_self:
            source_snapshots = [snapshot['Name']]

        source_snapshots.sort()
        return source_snapshots

    def merge_snapshots(self):
        """
        Create component snapshots by merging other snapshots of same component
        """
        self.publish_snapshots = []
        for component, snapshots in self.components.items():
            if len(snapshots) <= 1:
                # Only one snapshot, no need to merge
                lg.debug("Component %s has only one snapshot %s, not creating merge snapshot" % (component, snapshots))
                self.publish_snapshots.append({
                    'Component': component,
                    'Name': snapshots[0]
                })
                continue

            # Look if merged snapshot doesn't already exist
            remote_snapshot = self._find_snapshot(r'^%s%s-%s-\d+' % (self.merge_prefix, self.name.replace('./', '').replace('/', '-'), component))
            if remote_snapshot:
                source_snapshots = self._get_source_snapshots(remote_snapshot)

                # Check if latest merged snapshot has same source snapshots like us
                snapshots_want = list(snapshots)
                snapshots_want.sort()

                lg.debug("Comparing snapshots: snapshot_name=%s, snapshot_sources=%s, wanted_sources=%s" % (remote_snapshot['Name'], source_snapshots, snapshots_want))
                if snapshots_want == source_snapshots:
                    lg.info("Remote merge snapshot already exists: %s (%s)" % (remote_snapshot['Name'], source_snapshots))
                    self.publish_snapshots.append({
                        'Component': component,
                        'Name': remote_snapshot['Name']
                    })
                    continue

            snapshot_name = '%s%s-%s-%s' % (self.merge_prefix, self.name.replace('./', '').replace('/', '-'), component, self.timestamp)
            lg.info("Creating merge snapshot %s for component %s of snapshots %s" % (snapshot_name, component, snapshots))
            package_refs = []
            for snapshot in snapshots:
                # Get package refs from each snapshot
                packages = self._get_packages(self.client, "snapshots", snapshot)
                package_refs.extend(packages)

            try:
                self.client.do_post(
                    '/snapshots',
                    data={
                        'Name': snapshot_name,
                        'SourceSnapshots': snapshots,
                        'Description': "Merged from sources: %s" % ', '.join("'%s'" % snap for snap in snapshots),
                        'PackageRefs': package_refs,
                    }
                )
            except AptlyException as e:
                if e.res.status_code == 400:
                    lg.warning("Error creating snapshot %s, assuming it already exists" % snapshot_name)
                else:
                    raise

            self.publish_snapshots.append({
                'Component': component,
                'Name': snapshot_name
            })

    def drop_publish(self):
        lg.info("Deleting publish, distribution=%s, storage=%s" % (self.name, self.storage or "local"))
        self.client.do_delete('/publish/%s' % (self.full_name))

    def update_publish(self, force_overwrite=False, publish_contents=False,
                       acquire_by_hash=True):
        lg.info("Updating publish, distribution=%s storage=%s snapshots=%s" %
                (self.name, self.storage or "local", self.publish_snapshots))
        self.client.do_put(
            '/publish/%s' % (self.full_name),
            {
                'Snapshots': self.publish_snapshots,
                'ForceOverwrite': force_overwrite,
                'SkipContents': not publish_contents,
                'AcquireByHash': acquire_by_hash,
            }
        )

    def create_publish(self, force_overwrite=False, publish_contents=False,
                       architectures=None, acquire_by_hash=True):
        lg.info("Creating new publish, distribution=%s storage=%s snapshots=%s, architectures=%s" %
                (self.name, self.storage or "local", self.publish_snapshots, architectures))

        if self.prefix:
            prefix = '%s%s' % ("/"+self.storage+":" or "/", self.prefix)
        else:
            prefix = '%s' % ("/"+self.storage+":" or "")

        opts = {
            "Storage": self.storage,
            "SourceKind": "snapshot",
            "Distribution": self.distribution,
            "Sources": self.publish_snapshots,
            "ForceOverwrite": force_overwrite,
            'SkipContents': not publish_contents,
            'AcquireByHash': acquire_by_hash,
        }

        if architectures or self.architectures:
            opts['Architectures'] = architectures or self.architectures

        self.client.do_post(
            '/publish%s' % (prefix or ''),
            opts
        )

    def do_publish(self, recreate=False, no_recreate=False,
                   force_overwrite=False, publish_contents=False,
                   acquire_by_hash=False, architectures=None,
                   merge_snapshots=True, only_latest=False, config=None,
                   components=[]):
        if merge_snapshots:
            self.merge_snapshots()
        try:
            publish = self._get_publish()
        except NoSuchPublish:
            publish = False

        if only_latest:
            (_, publish_dict) = PublishManager.get_repo_information(config, self.client)
            self.purge_publish([], publish_dict, components, False)

        if not publish:
            # New publish
            self.create_publish(force_overwrite, publish_contents, architectures
                                or self.architectures, acquire_by_hash)
        else:
            # Test if publish is up to date
            to_publish = [x['Name'] for x in self.publish_snapshots]
            published = [x['Name'] for x in publish['Sources']]

            to_publish.sort()
            published.sort()

            if recreate:
                lg.info("Recreating publish %s (%s)" % (self.name, self.storage or "local"))
                self.drop_publish()
                self.create_publish(force_overwrite, publish_contents,
                                    architectures or self.architectures,
                                    acquire_by_hash)
            elif to_publish == published:
                lg.info("Publish %s (%s) is up to date" % (self.name, self.storage or "local"))
            else:
                try:
                    self.update_publish(force_overwrite, publish_contents,
                                        acquire_by_hash)
                except AptlyException as e:
                    if e.res.status_code == 404:
                        # Publish exists but we are going to add some new
                        # components. Unfortunately only way is to recreate it
                        if no_recreate:
                            lg.error("Cannot update publish %s (adding new components?), falling back to recreating it is disabled so skipping." % self.full_name)
                        else:
                            lg.warning("Cannot update publish %s (adding new components?), falling back to recreating it" % self.full_name)
                            self.drop_publish()
                            self.create_publish(force_overwrite,
                                                publish_contents,
                                                architectures or self.architectures)
                    else:
                        raise