import codecs
import fnmatch
import logging
import os
import re

import utils.misc
from utils.features import ConfigFeature

logger = logging.getLogger('crawlutils')


def crawl_config_files(
    root_dir='/',
    exclude_dirs=[],
    root_dir_alias=None,
    known_config_files=[],
    discover_config_files=False,
    accessed_since=0
):

    saved_args = locals()
    logger.debug('Crawling config files: %s' % (saved_args))

    if not os.path.isdir(root_dir):
        return

    root_dir_alias = root_dir_alias or root_dir
    exclude_dirs = [utils.misc.join_abs_paths(root_dir, d) for d in
                    exclude_dirs]
    exclude_regex = r'|'.join([fnmatch.translate(d) for d in
                               exclude_dirs]) or r'$.'
    known_config_files[:] = [utils.misc.join_abs_paths(root_dir, f) for f in
                             known_config_files]
    known_config_files[:] = [f for f in known_config_files
                             if not re.match(exclude_regex, f)]
    config_file_set = set()
    for fpath in known_config_files:
        if os.path.exists(fpath):
            lstat = os.lstat(fpath)
            if (lstat.st_atime > accessed_since or
                    lstat.st_ctime > accessed_since):
                config_file_set.add(fpath)

    if discover_config_files:
        discover_config_file_paths(accessed_since, config_file_set,
                                   exclude_regex, root_dir)

    for fpath in config_file_set:
        (_, fname) = os.path.split(fpath)
        # realpath sanitizes the path a bit, for example: '//abc/' to '/abc/'
        frelpath = os.path.realpath(fpath.replace(root_dir, root_dir_alias, 1))
        with codecs.open(filename=fpath, mode='r',
                         encoding='utf-8', errors='ignore') as \
                config_file:

            # Encode the contents of config_file as utf-8.

            yield (frelpath, ConfigFeature(fname,
                                           config_file.read(),
                                           frelpath), 'config')


def discover_config_file_paths(accessed_since, config_file_set,
                               exclude_regex, root_dir):
    # Walk the directory hierarchy starting at 'root_dir' in BFS
    # order looking for config files.
    for (root_dirpath, dirs, files) in os.walk(root_dir):
        dirs[:] = [os.path.join(root_dirpath, d) for d in
                   dirs]
        dirs[:] = [d for d in dirs
                   if not re.match(exclude_regex, d)]
        files = [os.path.join(root_dirpath, f) for f in
                 files]
        files = [f for f in files
                 if not re.match(exclude_regex, f)]
        for fpath in files:
            if os.path.exists(fpath) \
                    and _is_config_file(fpath):
                lstat = os.lstat(fpath)
                if lstat.st_atime > accessed_since \
                        or lstat.st_ctime > accessed_since:
                    config_file_set.add(fpath)


def _is_config_file(fpath):
    (_, ext) = os.path.splitext(fpath)
    if os.path.isfile(fpath) and ext in [
        '.xml',
        '.ini',
        '.properties',
        '.conf',
        '.cnf',
        '.cfg',
        '.cf',
        '.config',
        '.allow',
        '.deny',
        '.lst',
    ] and os.path.getsize(fpath) <= 204800:
        return True
    return False