python source code of tv_grab

#!/usr/bin/env python2
# -*- coding: utf-8 -*-

# Python 3 compatibility
from __future__ import unicode_literals
# from __future__ import print_function

import re, sys, traceback, difflib, os
import time, datetime, pytz, random
import requests, httplib, socket
from DataTreeGrab import *
from tv_grab_channel import ProgramNode
from tv_grab_IO import DD_Convert
from threading import Thread, RLock, Semaphore, Event
from xml.sax import saxutils
from Queue import Queue, Empty
from copy import deepcopy, copy
try:
    import json
except ImportError:
    import simplejson as json

try:
    from html.entities import name2codepoint
except ImportError:
    from htmlentitydefs import name2codepoint

try:
    unichr(42)
except NameError:
    unichr = chr    # Python 3

class dtError(dtErrorConstants):
    def __init__(self):
        self.dtInvalidDate = 11
        self.dtShiftedDate = 12
        self.dtErrorTexts[self.dtInvalidDate] = 'Invalid Page Date!'
        self.dtErrorTexts[self.dtShiftedDate] = 'Shifted Page Date!'

dte = dtError()

class URLtypes():
    ONECHANNEL = 1
    ALLCHANNELS = 2
    CHANNELGROUP = 3
    ONEDAY = 0
    ALLDAYS = 4
    DAYGROUP = 8
    RECORDGROUP = 12

    def getsonechannel(self, url_type):
        return bool((url_type & 3) == self.ONECHANNEL)

    def getsallchannels(self, url_type):
        return bool((url_type & 3) == self.ALLCHANNELS)

    def getschannelgroup(self, url_type):
        return bool((url_type & 3) == self.CHANNELGROUP)

    def getsoneday(self, url_type):
        return bool((url_type & 12) == self.ONEDAY)

    def getsalldays(self, url_type):
        return bool((url_type & 12) == self.ALLDAYS)

    def getsdaygroup(self, url_type):
        return bool((url_type & 12) == self.DAYGROUP)

    def getsrecordgroup(self, url_type):
        return bool((url_type & 12) == self.RECORDGROUP)

# end URLtypes()

class Functions():
    """Some general Fetch functions"""

    def __init__(self, config):
        self.config = config
        self.max_fetches = Semaphore(self.config.opt_dict['max_simultaneous_fetches'])
        self.count_lock = RLock()
        self.progress_counter = 0
        self.channel_counters = {}
        self.source_counters = {}
        self.source_counters['total'] = {}
        self.raw_json = {}
        self.cache_id = self.config.cache_id
        self.json_id = self.config.json_id
        self.ttvdb1_id = self.config.ttvdb1_id
        self.ttvdb2_id = self.config.ttvdb2_id
        self.imdb3_id = self.config.imdb3_id

    # end init()

    def update_counter(self, cnt_type, source_id=None, chanid=None, cnt_add=True, cnt_change=1):
        #source_id: -99 = cache, -98 = jsondata, -11 = ttvdb
        if source_id == None:
            source_id = self.ttvdb1_id

        if not isinstance(cnt_change, int) or cnt_change == 0:
            return

        if not cnt_type in ('base', 'detail', 'empty-base', 'empty-detail', 'fail',
            'lookup', 'lookup_fail', 'queue', 'jsondata', 'failjson', 'exclude'):
            return

        if not isinstance(cnt_change, int) or cnt_change == 0:
            return

        with self.count_lock:
            if not cnt_add:
                cnt_change = -cnt_change

            if chanid != None and isinstance(chanid, (str, unicode)):
                if not chanid in self.channel_counters.keys():
                    self.channel_counters[chanid] = {}

                if not cnt_type in self.channel_counters[chanid].keys():
                    self.channel_counters[chanid][cnt_type] = {}

                if not source_id in self.channel_counters[chanid][cnt_type].keys():
                    self.channel_counters[chanid][cnt_type][source_id] = 0

                self.channel_counters[chanid][cnt_type][source_id] += cnt_change

            if not source_id in self.source_counters.keys():
                self.source_counters[source_id] = {}

            if not cnt_type in self.source_counters[source_id].keys():
                self.source_counters[source_id][cnt_type] = 0

            self.source_counters[source_id][cnt_type] += cnt_change
            if isinstance(source_id, int) and (source_id >= 0 or source_id == self.json_id):
                if cnt_type in self.source_counters['total'].keys():
                    self.source_counters['total'][cnt_type] += cnt_change

                else:
                    self.source_counters['total'][cnt_type] = cnt_change
    # end update_counter()

    def get_counter(self, cnt_type, source_id=None, chanid=None):
        if source_id == None:
            source_id = self.ttvdb1_id

        if chanid == None:
            if not source_id in self.source_counters.keys():
                return 0

            if not cnt_type in self.source_counters[source_id].keys():
                return 0

            return self.source_counters[source_id][cnt_type]

        elif not chanid in self.channel_counters.keys():
            return 0

        elif not cnt_type in self.channel_counters[chanid].keys():
            return 0

        elif not source_id in self.channel_counters[chanid][cnt_type].keys():
            return 0

        return self.channel_counters[chanid][cnt_type][source_id]
    # end get_counter()

    def get_page(self, url, *args):
        """
        Wrapper around get_page_internal to catch the
        timeout exception
        """
        try:
            data = {
            'encoding': None if len(args) < 1 else args[0],
            'txtdata': None if len(args) < 3 else args[2],
            'is_json': None if len(args) < 4 else args[3]}
            accept_header = None if len(args) < 2 else args[1]
            if isinstance(accept_header, dict):
                data['headers'] = accept_header

            elif isinstance(accept_header, (str,unicode)) and accept_header!= '':
                data['headers'] = {'Accept': accept_header}

            else:
                data['headers'] = {}

            data['headers']['Keep-Alive']  = '300'
            if not 'User-Agent'in data['headers'].keys():
                data['headers']['User-Agent'] = self.config.user_agents[random.randint(0, len(self.config.user_agents)-1)]

            cookiejar = None if len(args) < 5 else args[4]
            if isinstance(cookiejar, dict) and len(cookiejar) > 0:
                data['cookiejar'] = cookiejar

            fu = FetchURL(self.config, url, **data)
            self.max_fetches.acquire()
            fu.start()
            fu.join(self.config.opt_dict['global_timeout']+1)
            page = fu.result
            self.max_fetches.release()
            if fu.page_status == dte.dtDataOK:
                if (page == None) or (page =={}) or (isinstance(page, (str, unicode)) and \
                    ((re.sub('\n','', page) == '') or (re.sub('\n','', page) =='{}'))):
                    if self.config.write_info_files:
                        self.config.infofiles.add_url_failure('No Data: %s\n' % url)

                    return (dte.dtEmpty, None, fu.status_code)

            return (fu.page_status, page, fu.status_code)

        except(socket.timeout):
            self.config.log(self.config.text('fetch', 1, (self.config.opt_dict['global_timeout'], url)), 1, 1)
            if self.config.write_info_files:
                self.config.infofiles.add_url_failure('Fetch timeout: %s\n' % url)

            self.max_fetches.release()
            return (dte.dtTimeoutError, None, fu.status_code)
    # end get_page()

    def get_json_data(self, name, **data):
        source = data.get('source', self.json_id)
        fpath = data.get('fpath', None)
        ctype = data.get('ctype', None)
        conv_dd = DD_Convert(self.config,
                        warngoal = self.config.logging.log_queue,
                        caller_id = source)
        if source == self.json_id or self.config.test_modus:
            local_name = name

        else:
            version = data.get('version', 0)
            local_name = '%s.%s' % (name, version)

        self.raw_json[name] = ''
        # Try to find the source files locally
        if self.config.test_modus or (self.config.only_local_sourcefiles and source == self.json_id):
            try:
                if fpath != None:
                    fle = self.config.IO_func.open_file('%s/%s.json' % (fpath, name), 'r', 'utf-8')
                    if fle != None:
                        data = json.load(fle)
                        if source != self.json_id:
                            conv_dd.convert_sourcefile(data, ctype)
                            return conv_dd.csource_data

                        return data

            except(ValueError) as e:
                self.config.log('  JSON error: %s\n' % e)

            except:
                self.config.log(traceback.print_exc())

        elif source != self.json_id:
            # We try to get the converted pickle in the supplied location,
            # but check that it is of the right dt version and date
            try:
                if fpath != None:
                    fn = '%s/%s.bin' % (fpath, local_name)
                    if os.path.isfile(fn) and datetime.date.fromtimestamp(os.stat(fn).st_mtime) >= self.config.dtdate:
                        fle = self.config.IO_func.read_pickle(fn)
                        if fle != None and data_value(["dtversion"], fle, tuple) == conv_dd.dtversion() \
                            and data_value(["tvgversion"], fle, tuple, None) == \
                                tuple(self.config.version(False, True)[1:5]):
                            return fle

            except:
                self.config.log(traceback.print_exc())

        # We try to download unless the only_local_sourcefiles flag is set
        if not self.config.only_local_sourcefiles:
            try:
                headers = {'Keep-Alive' : '300',
                              'User-Agent' : self.config.user_agents[random.randint(0, len(self.config.user_agents)-1)] }

                url = '%s/%s.json' % (data.get('url', self.config.api_source_url), name)
                self.config.log(self.config.text('fetch', 1,(name, ), 'other'), 1)
                fu = FetchURL(self.config, url,
                        headers = headers,
                        enoding = 'utf-8',
                        is_json = True)
                self.max_fetches.acquire()
                self.update_counter('jsondata', source)
                fu.start()
                fu.join(self.config.opt_dict['global_timeout']+1)
                page = fu.result
                self.max_fetches.release()
                if (page == None) or (page =={}) or \
                    (isinstance(page, (str, unicode)) and \
                        ((re.sub('\n','', page) == '') or (re.sub('\n','', page) =='{}'))):
                    self.update_counter('failjson', source)
                    if source != self.json_id:
                        return None

                else:
                    self.raw_json[name] = fu.url_text
                    if source == self.json_id:
                        return page

                    for v in range(1, version+1):
                        self.config.IO_func.remove_file('%s/%s.%s.json' % (fpath, name, v))
                        self.config.IO_func.remove_file('%s/%s.%s.bin' % (fpath, name, v))

                    conv_dd.convert_sourcefile(page, ctype, '%s/%s.bin' % (fpath, local_name))
                    return conv_dd.csource_data

            except:
                if source != self.json_id:
                    return None

        # And for the two mainfiles we try to fall back to the library location
        if source == self.json_id:
            try:
                fle = self.config.IO_func.open_file('%s/%s.json' % (self.config.source_dir, name), 'r', 'utf-8')
                if fle != None:
                    return json.load(fle)

            except(ValueError) as e:
                self.config.log('  JSON error: %s\n' % e)

            except:
                return None

    # end get_json_data()

    def remove_accents(self, name):
        name = re.sub('á','a', name)
        name = re.sub('é','e', name)
        name = re.sub('í','i', name)
        name = re.sub('ó','o', name)
        name = re.sub('ú','u', name)
        name = re.sub('ý','y', name)
        name = re.sub('à','a', name)
        name = re.sub('è','e', name)
        name = re.sub('ì','i', name)
        name = re.sub('ò','o', name)
        name = re.sub('ù','u', name)
        name = re.sub('ä','a', name)
        name = re.sub('ë','e', name)
        name = re.sub('ï','i', name)
        name = re.sub('ö','o', name)
        name = re.sub('ü','u', name)
        name = re.sub('ÿ','y', name)
        name = re.sub('â','a', name)
        name = re.sub('ê','e', name)
        name = re.sub('î','i', name)
        name = re.sub('ô','o', name)
        name = re.sub('û','u', name)
        name = re.sub('ã','a', name)
        name = re.sub('õ','o', name)
        name = re.sub('@','a', name)
        return name
    # end remove_accents()

    def get_offset(self, date):
        """Return the offset from today"""
        cd = self.config.in_fetch_tz(datetime.datetime.now(pytz.utc))
        rd = self.config.in_fetch_tz(date)
        return int(rd.toordinal() -  cd.toordinal())
    # end get_offset()

    def get_fetchdate(self, date):
        """Return the date from today"""
        cd = self.config.in_fetch_tz(datetime.datetime.now(pytz.utc))
        rd = self.config.in_fetch_tz(date)
        return rd.date()
    # end get_fetchdate()

    def print_time(self, time):
        if isinstance(time, datetime.datetime):
            return time.strftime('%d %b %H:%M %Z')
    # end print_time()

    def sleep(self):
        time.sleep(random.randint(self.config.opt_dict['nice_time'][0], self.config.opt_dict['nice_time'][1]))
    # end sleep()

    def merge_date_time(self, date_ordinal, date_time, tzinfo = None, as_utc = True):
        if tzinfo == None:
            tzinfo = self.config.utc_tz

        try:
            rtime = datetime.datetime.combine(datetime.date.fromordinal(date_ordinal), date_time)
            rtime = tzinfo.localize(rtime)
            if as_utc:
                rtime = self.config.in_utc(rtime)

            return rtime

        except:
            return None
    # end merge_date_time()
# end Functions()

class FetchURL(Thread):
    """
    A simple thread to fetch a url with a timeout
    """
    def __init__ (self, config, url, **kwargs):
        Thread.__init__(self, name = 'fetching')
        self.thread_type = 'fetching'
        self.state = 0
        self.config = config
        self.func = self.config.fetch_func
        self.url = url
        self.url_data = {
            'params': kwargs.get('txtdata', None),
            'headers': kwargs.get('headers', None),
            'timeout': self.config.opt_dict['global_timeout']/2,
            'stream': True}
        cookiejar = kwargs.get('cookiejar', None)
        if isinstance(cookiejar, dict) and len(cookiejar) > 0:
            self.url_data['cookies'] = cookiejar

        self.encoding = kwargs.get('encoding',None )
        self.is_json = kwargs.get('is_json', False)
        self.raw = ''
        self.result = None
        self.page_status = dte.dtDataOK
        self.url_request = None
        self.status_code = None

    def run(self):
        self.result = self.get_page_internal()

    def find_html_encoding(self):
        # look for the text '<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />'
        # in the first 600 bytes of the HTTP page
        m = re.search(r'<meta[^>]+\bcharset=["\']?([A-Za-z0-9\-]+)\b',
            self.raw[:512].decode('ascii', 'ignore'))
        if m:
            return m.group(1)

    def get_page_internal(self):
        """
        Retrieves the url and returns a string with the contents.
        Optionally, returns None if processing takes longer than
        the specified number of timeout seconds.
        """
        try:
            self.url_request = requests.get(self.url, **self.url_data)
            self.status_code = self.url_request.status_code
            if self.url_request.status_code != requests.codes.ok:
                if self.status_code == 500 and len(self.url_request.text) > 0 and \
                    not self.url_request.text.strip()[0] in ("{", "["):
                    # This probably is an inclomplete read we possibly can fix
                    self.page_status = dte.dtIncompleteRead

                else:
                    self.url_request.raise_for_status()

            encoding = self.find_html_encoding()
            if encoding != None:
                self.url_request.encoding = encoding

            elif self.encoding != None:
                self.url_request.encoding = self.encoding

            self.raw = self.url_request.content
            self.url_text = self.url_request.text

            if ('content-type' in self.url_request.headers and \
                'json' in self.url_request.headers['content-type']) or self.is_json:
                try:
                    return self.url_request.json()

                except(ValueError) as e:
                    self.config.log(self.config.text('fetch', 5, (self.url, e)), 1, 1)
                    self.page_status = dte.dtJSONerror
                    if self.config.write_info_files:
                        self.config.infofiles.add_url_failure('JSONError: %s\n' % self.url)
                        self.config.infofiles.write_raw_string(self.url_text)

                return None

            else:
                return self.url_text

        except (requests.ConnectionError):
            self.config.log(self.config.text('fetch', 3, (self.url, )), 1, 1)
            self.page_status = dte.dtURLerror
            if self.config.write_info_files:
                self.config.infofiles.add_url_failure('URLError: %s\n' % self.url)

        except (requests.HTTPError) as e:
            self.config.log(self.config.text('fetch', 4, (self.url, '%s: %s' \
                % (e.response.status_code, e.response.reason))), 1, 1)
            self.page_status = dte.dtHTTPerror
            if self.config.write_info_files:
                self.config.infofiles.add_url_failure('HTTPError %s: %s: %s\n' \
                    % (e.response.status_code, e.response.reason, self.url))

        except (requests.Timeout):
            self.config.log(self.config.text('fetch', 1, (self.config.opt_dict['global_timeout'], self.url)), 1, 1)
            self.page_status = dte.dtTimeoutError
            if self.config.write_info_files:
                self.config.infofiles.add_url_failure('Fetch timeout: %s\n' % self.url)

        except:
            self.config.log(self.config.text('fetch', 2,  (sys.exc_info()[0], sys.exc_info()[1], self.url)), 0)
            self.page_status = dte.dtUnknownError
            if self.config.write_info_files:
                self.config.infofiles.add_url_failure('%s,%s:\n  %s\n' % (sys.exc_info()[0], sys.exc_info()[1], self.url))

# end FetchURL

class DataTree(DataTreeShell, Thread):
    def __init__(self, source, data_def, warnaction = "default", caller_id = 0):
        Thread.__init__(self, name = 'DataTree')
        self.thread_type = 'DataTree'
        self.source = source
        self.export = False
        self.rundata = {}
        self.state = 0
        self.config = self.source.config
        self.fetch_string_parts = re.compile("(.*?[.?!:]+ |.*?\Z)")
        DataTreeShell.__init__(self, data_def,
                        warnaction = warnaction,
                        warngoal = self.config.logging.log_queue,
                        caller_id = caller_id)
        self.print_tags = source.print_tags
        self.show_result = source.show_parsing
        self.fle = source.test_output
        self.simplefilter("error", category = dtDataWarning, severity = 1)
        sys.modules['DataTreeGrab']._warnings.filterwarnings('ignore', category = dtLinkWarning, \
            message = 'Regex "\\\d\*/\?\(\\\d\*\)\.\*" in: .*?', caller_id = caller_id)
        sys.modules['DataTreeGrab']._warnings.filterwarnings('ignore', category = dtLinkWarning, \
            message = 'Regex "\(\\\d\*\)/\.\*" in: .*?', caller_id = caller_id)

    def run(self):
        try:
            if data_value('task',self.rundata, str) == 'epdata':
                tid = data_value('tid',self.rundata, int, 0)
                queryid = data_value('queryid',self.rundata, int, -1)
                self.searchtree.show_progress = True
                if self.extract_datalist():
                    if self.check_errorcode() in (dte.dtStartNodeInvalid, dte.dtNoData):
                        self.config.log(self.config.text('ttvdb', 16, ('%s: %s' % (tid, self.rundata['name']), )))

                    else:
                        self.config.log(self.config.text('fetch', 13, (self.check_errorcode(text_values = True)[0], )))

                    self.source.detail_request.put({'task': 'fail_ep_info', 'queryid': queryid})

                if self.export:
                    chanid = data_value('chanid',self.rundata, str)
                    if len(self.result) == 0:
                        self.source.functions.update_counter('fail', self.config.ttvdb1_id, chanid)
                        self.source.detail_request.put({'task': 'fail_ep_info', 'tid': tid, 'queryid': queryid})

                    else:
                        eps = self.source.process_data(self.result, tid, 'en')
                        self.init_data_def(self.source.data_value("seriesname", dict))
                        if not self.extract_datalist():
                            self.source.store_data('ttvdbid', data=self.result)

                        epc = []
                        for k, v in eps[1].items():
                            epc.append({'tid': tid,'sid': k, 'count': v})

                        self.source.store_data('episodes', data=eps[0])
                        self.source.store_data('epcount', data=epc,
                            confirm={'task': 'process_ep_info', 'queryid': queryid})

        except dtWarning as e:
            self.config.log(self.config.text('fetch', 13, (e.message, )))
            self.source.detail_request.put({'task': 'fail_ep_info', 'queryid': queryid})

    def get_string_parts(self, sstring, header_items = None):
        if not isinstance(header_items, (list, tuple)):
            header_items = []

        test_items = []
        for hi in header_items:
            if isinstance(hi, (str, unicode)):
                test_items.append((hi.lower(), hi))

            elif isinstance(hi, (list, tuple)):
                if len(hi) > 0 and isinstance(hi[0], (str, unicode)):
                    hi0 = hi[0].lower()
                    if len(hi) > 1 and isinstance(hi[1], (str, unicode)):
                        hi1 = hi[1]

                    else:
                        hi1 = hi[0]

                    test_items.append((hi0, hi1))

        string_parts = self.fetch_string_parts.findall(sstring)
        string_items = {}
        act_item = 'start'
        string_items[act_item] = []
        for dp in string_parts:
            if dp.strip() == '':
                continue

            if dp.strip()[-1] == ':':
                act_item = dp.strip()[0:-1].lower()
                string_items[act_item] = []

            else:
                for ti in test_items:
                    if dp.strip().lower()[0:len(ti[0])] == ti[0]:
                        act_item = ti[1]
                        string_items[act_item] = []
                        string_items[act_item].append(dp[len(ti[0]):].strip())
                        break

                else:
                    string_items[act_item].append(dp.strip())

        return string_items

    def add_on_link_functions(self, fid, data = None, default = None):
        def link_warning(text, severity=4):
            self.warn('%s on function: "%s"\n   Using link_data: %s' %
                (text, fid, data), dtLinkWarning, severity, 3)

        def split_kommastring(dstring):

            return re.sub('\) ([A-Z])', '), \g<1>', \
                re.sub(self.config.language_texts['and'], ', ', \
                re.sub(self.config.language_texts['and others'], '', dstring))).split(',')

        def add_person(prole, pname, palias = None):
            if pname in ('', None):
                return

            if pname[-1] in '\.,:;-':
                pname = pname[:-1].strip()

            if not prole in credits:
                credits[prole] = []

            if prole in ('actor', 'guest'):
                p = {'name': pname, 'role': palias}
                credits[prole].append(p)

            else:
                credits[prole].append(pname)

        try:
            # split logo name and logo provider
            if fid == 101:
                if is_data_value(0, data, str):
                    d = data[0].split('?')[0]
                    for k, v in self.config.xml_output.logo_provider.items():
                        if d[0:len(v)] == v:
                            return (d[len(v):], k)

                return ('',-1)

            # Extract roles from a set of lists or named dicts
            if fid == 102:
                credits = {}
                if len(data) == 0:
                    return default

                # It's a single list of dicts created through the "name" keyword
                if is_data_value(0, data, list) and (len(data) == 1 or is_data_value(1, data, int)):
                    modus = data_value(1, data, int, 0)
                    for item in data[0]:
                        if not isinstance(item, dict):
                            continue

                        for k, v in item.items():
                            if k.lower() in self.config.roletrans.keys():
                                role = self.config.roletrans[k.lower()]
                                if modus == 1:
                                    for pp in v:
                                        cn = re.search('(.*?)\((.*?)\)',pp)
                                        if cn:
                                            add_person(role, cn.group(1).strip(), cn.group(2).strip())

                                        else:
                                            add_person(role, pp.strip())

                                else:
                                    for pp in v:
                                        pp = pp.split(',')
                                        for p in pp:
                                            cn = p.split('(')
                                            if len(cn) > 1:
                                                add_person(role, cn[0].strip(), cn[1].split(')')[0].strip())

                                            else:
                                                add_person(role, cn[0].strip())

                    return credits

                if len(data) < 2:
                    return default

                # data[0] is a list of list of names
                # data[1] is a list of roles
                # data[2] is an optional list of characters matching data[0]
                if is_data_value(0, data, list) and is_data_value(1, data, list):
                    for item in range(len(data[0])):
                        if item >= len(data[1]):
                            continue

                        if data[1][item].lower() in self.config.roletrans.keys():
                            role = self.config.roletrans[data[1][item].lower()]
                            if isinstance(data[0][item], (str, unicode)):
                                cast = split_kommastring(data[0][item])

                            else:
                                cast = data[0][item]

                            if isinstance(cast, (list, tuple)):
                                for person in cast:
                                    if len(data) > 2 and isinstance(data[2],(list, tuple)) and len(data[2]) > item:
                                        add_person(role, person.strip(), data[2][item])

                                    else:
                                        add_person(role, person.strip())

                # The same but with a single role
                elif isinstance(data[1], (str,unicode)) and data[1].lower() in self.config.roletrans.keys():
                    role = self.config.roletrans[data[1].lower()]

                    if isinstance(data[0], (str, unicode)):
                        cast = split_kommastring(data[0])

                    else:
                        cast = data[0]

                    if isinstance(cast, (list, tuple)):
                        for item in range(len(cast)):
                            if len(data) > 2 and isinstance(data[2],(list, tuple)) and len(data[2]) > item:
                                add_person(role, cast[item].strip(), data[2][item])

                            else:
                                add_person(role, cast[item].strip())

                return credits

            # Extract roles from a string
            if fid == 103:
                if len(data) == 0 or data[0] == None:
                    return {}

                if isinstance(data[0], (str, unicode)) and len(data[0]) > 0:
                    tstr = unicode(data[0])
                elif isinstance(data[0], list) and len(data[0]) > 0:
                    tstr = unicode(data[0][0])
                    for index in range(1, len(data[0])):
                        tstr = u'%s %s' % (tstr, unicode(data[0][index]))
                else:
                    return {}

                if len(data) == 1:
                    cast_items = self.get_string_parts(tstr)

                else:
                    cast_items = self.get_string_parts(tstr, data[1])

                credits = {}
                for crole, cast in cast_items.items():
                    if len(cast) == 0:
                        continue

                    elif crole.lower() in self.config.roletrans.keys():
                        role = self.config.roletrans[crole.lower()]
                        cast = split_kommastring(cast[0])

                        for cn in cast:
                            cn = cn.split('(')
                            if len(cn[0].strip().split(' ')) > 4:
                                continue

                            if len(cn) > 1:
                                add_person(role, cn[0].strip(), cn[1].split(')')[0].strip())

                            else:
                                add_person(role, cn[0].strip())

                return credits

            # Process a rating item
            if fid == 104:
                rlist = []
                if is_data_value(0, data, str):
                    # We treat a string as a list of items with a maximum length
                    if data_value(1, data, str) == 'as_list':
                        item_length = data_value(2, data, int, 1)
                        unique_added = False
                        for index in range(len(data[0])):
                            code = None
                            for cl in range(item_length):
                                if index + cl >= len(data[0]):
                                    continue

                                tval = data[0][index: index + cl + 1]
                                if tval in self.source.source_data['rating'].keys():
                                    code = self.source.source_data['rating'][tval]
                                    break

                            if code != None:
                                if code in self.config.rating["unique_codes"].keys():
                                    if unique_added:
                                        continue

                                    rlist.append(code)
                                    unique_added = True

                                elif self.source.source_data['rating'][code] in self.config.rating["addon_codes"].keys():
                                    rlist.append(code)

                            elif self.config.write_info_files:
                                self.config.infofiles.addto_detail_list(u'new %s rating => %s' % (self.source.source, code))

                    else:
                        if data[0].lower() in self.source.source_data['rating'].keys():
                            v = self.source.source_data['rating'][data[0].lower()]
                            if v in self.config.rating["unique_codes"].keys():
                                rlist.append(v)

                            elif v in self.config.rating["addon_codes"].keys():
                                rlist.append(v)

                        elif self.config.write_info_files:
                            self.config.infofiles.addto_detail_list(u'new %s rating => %s' % (self.source.source, data[0]))

                elif is_data_value(0, data, list):
                    unique_added = False
                    for item in data[0]:
                        if item.lower() in self.source.source_data['rating'].keys():
                            v = self.source.source_data['rating'][item.lower()]
                            if v in self.config.rating["unique_codes"].keys():
                                if unique_added:
                                    continue

                                rlist.append(v)
                                unique_added = True

                            elif v in self.config.rating["addon_codes"].keys():
                                rlist.append(v)

                        elif self.config.write_info_files:
                            self.config.infofiles.addto_detail_list(u'new %s rating => %s' % (self.source.source, data[0]))

                return rlist

            # Check the text in data[1] for the presence of keywords to determine genre
            if fid == 105:
                if len(data) >= 2 and isinstance(data[0], dict):
                    for k, v in data[0].items():
                        kl = k.lower().strip()
                        for i in range(1, len(data)):
                            if isinstance(data[i], (str, unicode)) and kl in data[i].lower().strip():
                                return v

                return default

            # split a genre code in a generic part of known length and a specific part
            if fid == 106:
                if len(data) == 0 or not isinstance(data[0],(str, unicode, list)):
                    return []

                if len(data) == 1:
                    if isinstance(data[0], list):
                        return data[0]

                    else:
                        return [data[0]]


                if isinstance(data[0], list):
                    if len(data[0]) == 0:
                        return []

                    data[0] = data[0][0]

                if not isinstance(data[1], int) or len(data[0]) <= data[1]:
                    return [data[0]]

                return [data[0][:data[1]], data[0][data[1]:]]

            # Return unlisted values to infofiles in a fid 11 dict
            if fid in (107, 201):
                if len(data) < 2 or not isinstance(data[0], (list, tuple)):
                    return default

                if not isinstance(data[1], (list,tuple)):
                    data[1] = [data[1]]

                for index in range(len(data[1])):
                    data[1][index] = data[1][index].lower().strip()

                for sitem in data[0]:
                    for k, v in sitem.items():
                        if k.lower().strip() in data[1]:
                            continue

                        if k.lower().strip() in self.config.roletrans.keys():
                            continue

                        if self.config.write_info_files:
                            self.config.infofiles.addto_detail_list(u'new %s dataitem %s => %s' %
                                (self.source.source, k, v))

            # Return unlisted values to infofiles in a fid 10 list set
            if fid in (108, 202):
                if not self.config.write_info_files:
                    return

                if len(data) < 3 or not isinstance(data[0], (list,tuple)) or \
                    not isinstance(data[1], (list,tuple)) or not isinstance(data[2], (list,tuple)):
                    return

                for index in range(len(data[2])):
                    data[2][index] = data[2][index].lower().strip()

                for index in range(len(data[0])):
                    data[0][index] = data[0][index].lower().strip()

                for index in range(len(data[0])):
                    if data[0][index].lower() in data[2]:
                        continue

                    if data[0][index].lower() in self.config.roletrans.keys():
                        continue

                    if index >= len(data[1]):
                        self.config.infofiles.addto_detail_list(u'new %s dataitem %s' %
                            (self.source.source, data[0][index]))

                    else:
                        self.config.infofiles.addto_detail_list(u'new %s dataitem %s => %s' %
                            (self.source.source, data[0][index], data[1][index]))

            # strip data[1] from the start of data[0] if present and make sure it's unicode
            elif fid == 109:
                return {"fid": 1}

        except:
            self.config.log([self.config.text('fetch', 11, ('link', fid, self.source.source)),
                traceback.format_exc()], 1)
            return default

# end DataTree()

class theTVDB_v1(Thread):
    def __init__(self, config, source_data):
        Thread.__init__(self, name = 'source-thetvdb')
        self.config = config
        self.functions = self.config.fetch_func
        self.proc_id = self.config.ttvdb1_id
        self.quit = False
        self.ready = False
        self.state = 0
        self.active = True
        self.lastrequest = None
        self.lastquery = None
        self.pending_tids = {}
        self.queried_titles = {}
        self.api_key = "0BB856A59C51D607"
        self.source_lock = RLock()
        # The queue to receive answers on database queries
        self.cache_return = Queue()
        # The queue to receive requests for detail fetches
        self.detail_request = Queue()
        self.config.queues['ttvdb'] = self.detail_request
        self.thread_type = 'lookup'
        self.test_output = sys.stdout
        self.print_tags = False
        self.print_searchtree = False
        self.show_parsing = False
        self.print_roottree = False
        self.roottree_output = self.test_output
        self.show_result = False
        self.config.threads.append(self)
        self.local_encoding = self.config.logging.local_encoding
        self.show_progres = False
        self.lookup_log = []
        try:
            self.source_data = source_data
            self.source = self.source_data['name']
            self.lang_list = self.source_data['lang-list']
            self.detail_keys = {}
            self.detail_keys['series'] = list(self.source_data["last_updated"]["values"].keys())
            self.detail_keys['episodes'] = list(self.source_data["episodes"]["values"].keys())
            self.config.detail_keys['ttvdb'] = self.detail_keys['series']
            self.config.detail_keys['episodes'] = self.detail_keys['episodes']
            self.site_tz = self.source_data["site-tz"]
            self.datatrees = {}
            self.episodetrees = {}
            self.queryid = 0
            for ptype in ("seriesid", "last_updated", "episodes"):
                self.datatrees[ptype] = DataTree(self, self.data_value(ptype, dict), 'always', self.proc_id)

        except:
            self.config.opt_dict['disable_ttvdb'] = True
            traceback.print_exc()

    def run(self):
        if self.config.opt_dict['disable_ttvdb']:
            return

        pending_requests = {}
        def make_se(data):
            return '(s%se%s) %r:%r' % (unicode(data['season']).rjust(2, '0'), \
                            unicode(data['episode']).rjust(2, '0'), \
                            data['stitle'], \
                            data['episode title'])

        try:
            self.state = 4
            while True:
                if self.quit and self.detail_request.empty():
                    self.state = 0
                    break

                try:
                    crequest = self.detail_request.get(True, 5)
                    self.lastrequest = self.config.in_output_tz('now')
                    if self.quit:
                        if 'parent' in crequest:
                            crequest['parent'].detail_return.put('quit')

                        continue

                except Empty:
                    continue

                if (not isinstance(crequest, dict)) or (not 'task' in crequest):
                    continue

                if crequest['task'] == 'request_ep_info':
                    if not 'parent' in crequest:
                        continue

                    parent = crequest['parent']
                    if 'pn' in crequest:
                        pn = crequest['pn']
                        self.state = 3
                        qanswer = self.get_season_episode(parent, pn)
                        if not is_data_value('state', qanswer, int):
                            qanswer = {'state': 0, 'data': None}

                        if qanswer['state'] == -1:
                            parent.detail_return.put('quit')
                            self.quit = True
                            self.state = 4
                            continue

                        elif qanswer['state'] in (0, 1):
                            # Failed / Finished
                            d = qanswer['data']
                            parent.detail_return.put({'source': self.proc_id, 'data': d, 'pn': pn})
                            self.functions.update_counter('queue', self.proc_id,  parent.chanid, False)
                            if qanswer['state'] == 1:
                                self.functions.update_counter('lookup', self.proc_id, parent.chanid)
                                self.lookup_log.append('lookup %s(%s) <= %s: %s %s\n' % \
                                    (d['ttvdbid'], make_se(d), parent.chan_name,
                                        pn.get_start_stop(True, True), pn.get_title()))

                            else:
                                self.functions.update_counter('lookup_fail', self.proc_id, parent.chanid)
                                self.lookup_log.append('failed %s <= %s: %s %s\n' % \
                                    (qanswer['tid'], parent.chan_name, pn.get_start_stop(True, True), pn.get_title()))

                        elif qanswer['state'] == 2:
                            # Answer is pending
                            queryid = qanswer['queryid']
                            tid = qanswer['tid']
                            chanid = parent.chanid
                            self.pending_tids[tid] = queryid
                            pending_requests[queryid] = {'tid': tid, 'requests': [{'pn': pn, 'parent': parent}]}

                        elif qanswer['state'] == 3:
                            # There is a request for this tid pending
                            #{'state': 3, 'tid': tid}
                            queryid = self.pending_tids[qanswer['tid']]
                            pending_requests[queryid]['requests'].append({'pn': pn, 'parent': parent})
                    self.state = 4
                    continue

                if crequest['task'] == 'process_ep_info':
                    queryid = crequest['queryid']

                    if queryid in pending_requests:
                        tid = pending_requests[queryid]['tid']
                        prequests = pending_requests[queryid]['requests']
                        self.state = 3
                        for r in prequests:
                            parent = r['parent']
                            pn = r['pn']
                            qanswer = self.get_season_episode(parent, pn, tid)
                            if not is_data_value('state', qanswer, int):
                                qanswer = {'state': 0, 'data': None}

                            if qanswer['state'] == -1:
                                parent.detail_return.put('quit')
                                self.quit = True
                                self.state = 4
                                continue

                            d = qanswer['data']
                            parent.detail_return.put({'source': self.proc_id, 'data': d, 'pn': pn})
                            self.functions.update_counter('queue', self.proc_id,  parent.chanid, False)
                            if qanswer['state'] == 1:
                                self.functions.update_counter('lookup', self.proc_id, parent.chanid)
                                self.lookup_log.append('lookup %s(%s) <= %s: %s %s\n' % \
                                    (d['ttvdbid'], make_se(d), parent.chan_name,
                                        pn.get_start_stop(True, True), pn.get_title()))

                            else:
                                self.functions.update_counter('lookup_fail', self.proc_id, parent.chanid)
                                self.lookup_log.append('failed %s <= %s: %s %s\n' % \
                                    (qanswer['tid'], parent.chan_name,
                                        pn.get_start_stop(True, True), pn.get_title()))

                        del self.pending_tids[tid]
                        del pending_requests[queryid]
                        self.state = 4
                    continue

                if crequest['task'] == 'fail_ep_info':
                    queryid = crequest['queryid']

                    if queryid in pending_requests:
                        tid = pending_requests[queryid]['tid']
                        prequests = pending_requests[queryid]['requests']
                        for r in prequests:
                            parent = r['parent']
                            pn = r['pn']
                            parent.detail_return.put({'source': self.proc_id, 'data': None, 'pn': pn})
                            self.functions.update_counter('queue', self.proc_id,  parent.chanid, False)
                            self.functions.update_counter('lookup_fail', self.proc_id, parent.chanid)
                            self.lookup_log.append('failed %s <= %s: %s %s\n' % \
                                (tid, parent.chan_name, pn.get_start_stop(True, True), pn.get_title()))

                        del self.pending_tids[tid]
                        del pending_requests[queryid]
                    continue

                if crequest['task'] == 'quit':
                    self.quit = True
                    for dt in self.datatrees.values():
                        try:
                            dt.searchtree.quit = True

                        except:
                            continue

                    for dt in self.episodetrees.values():
                        try:
                            dt.searchtree.quit = True

                        except:
                            continue

                    continue

            if self.config.ttvdb_log_output != None:
                self.lookup_log.sort()
                for line in self.lookup_log:
                    self.config.ttvdb_log_output.write(line)

                self.config.ttvdb_log_output.close()
                self.config.ttvdb_log_output = None

        except:
            self.config.queues['log'].put({'fatal': [traceback.format_exc(), '\n'], 'name': 'theTVDB'})
            self.ready = True
            self.state = 0
            return(98)

    def query_ttvdb(self, ptype, pdata, chanid = None, background = False):
        '''
        Make a request on theTVDB.com and return the queryID or any return data from DataTree
        '''
        if self.lastquery != None and \
            self.config.in_output_tz('now') - self.lastquery < datetime.timedelta(seconds = 1):
            self.functions.sleep()

        self.lastquery = self.config.in_output_tz('now')
        if not ptype in self.datatrees.keys() or not isinstance(pdata, dict) or self.config.opt_dict['only_cache']:
            return

        # A request must either contain a name or an ID
        if not ('ttvdbid' in pdata or 'name' in pdata):
            return

        if is_data_value(['ttvdbid'], pdata, (int, str)):
            pdata['ttvdbid'] = unicode(pdata['ttvdbid'])

        if is_data_value(['name'], pdata, (int, str)):
            pdata['name'] = unicode(pdata['name'])

        pdata['api-key'] = self.api_key
        # A language must be valid
        if (not pdata['lang'] in self.lang_list) and not (pdata['lang'] == 'all' and ptype == 'seriesid'):
            pdata['lang'] = 'en'

        # Do we start an independent DataTree thread?
        if background:
            self.queryid += 1
            queryid = self.queryid
            self.episodetrees[queryid] = DataTree(self, self.data_value(ptype, dict), 'always', self.proc_id)
            dtree = self.episodetrees[queryid]
            dtree.rundata = {'task':'epdata',
                                        'queryid': queryid,
                                        'tid': int(pdata['ttvdbid']),
                                        'lang': pdata['lang'],
                                        'name': pdata['name'],
                                        'chanid': chanid}

        else:
            dtree = self.datatrees[ptype]

        # Get the page
        url = dtree.get_url(pdata, False)
        if self.print_searchtree:
            print '(url, encoding, accept_header, url_data, is_json)'
            print url

        self.functions.update_counter('detail', self.proc_id, chanid)
        pstate, page, pcode = self.functions.get_page(*url)
        if pstate != dte.dtDataOK or page == None:
            self.functions.update_counter('fail', self.proc_id, chanid)
            return None

        try:
            if dtree.init_data(page):
                return None

            if background:
                dtree.start()
                return queryid

            if dtree.extract_datalist():
                self.functions.update_counter('fail', self.proc_id, chanid)
                # It failed
                return None

            data = copy(dtree.result)
            if len(data) == 0:
                self.functions.update_counter('fail', self.proc_id, chanid)
                return None

            # we extract the main series data
            if ptype == 'episodes':
                dtree.init_data_def(self.data_value("seriesname", dict))
                if not dtree.extract_datalist():
                    self.store_data('ttvdbid', data=dtree.result)

                dtree.init_data_def(self.data_value("episodes", dict))

            return data

        except dtWarning as e:
            self.functions.update_counter('fail', self.proc_id, chanid)
            self.config.log(self.config.text('fetch', 14, (e.message, ptype, 'theTVDB.com')))
            return None

    def process_data(self, data, tid, lang):
        eps = []
        abs_cnt = 0
        pre_eps = []
        pre_cnt = 0
        sep_cnt = {}
        data.sort(key=lambda p: (p['sid'], p['eid']))
        for ep in data:
            if not isinstance(ep, dict):
                continue

            sid = data_value('sid', ep, int, -1)
            eid = data_value('eid', ep, int, -1)
            abseid = data_value('abseid', ep, int, -1)
            tepid = data_value('tepid', ep, int, -1)
            if sid == -1 or eid == -1:
                continue

            if sid in sep_cnt.keys():
                sep_cnt[sid] += 1

            else:
                sep_cnt[sid] = 1

            title = data_value('episode title', ep, str, 'Episode %s' % eid)
            desc = data_value('description', ep, str, None)
            airdate = data_value('airdate', ep, datetime.date, None)
            rating = data_value('star-rating', ep, float, None)
            writer = data_value('writer', ep, list)
            guest = data_value('guest', ep, list)
            actor = data_value('actor', ep, list)
            director = data_value('director', ep, list)
            edata = {'tid': int(tid),
                    'sid': int(sid),
                    'eid': int(eid),
                    'abseid': int(abseid),
                    'tepid': int(tepid),
                    'episode title': title,
                    'airdate': airdate,
                    'writer': writer,
                    'guest': guest,
                    'director': director,
                    'lang': lang,
                    'star-rating': rating,
                    'description': desc}

            eps.append(edata)

        return [eps, sep_cnt]

    def store_data(self, task, **data):
        '''
        Store any fetched data in the Database
        Optionally ask the database to confirm storing the data
        '''
        if isinstance(data, list) and len(data) == 0:
            return

        if task == 'ttvdbid':
            dbdata = {'task':'add', 'ttvdb': data['data']}

        elif task == 'alias':
            dbdata = {'task':'add', 'ttvdb_alias': data}

        elif task == 'episodes':
            dbdata = {'task':'add', 'episodes': data['data']}

        elif task == 'epcount':
            dbdata = {'task':'add', 'epcount': data['data']}

        elif task == 'delete ttvdbid':
            dbdata ={'task':'delete', 'ttvdb': data}

        else:
            return


        confirm = data.get('confirm', None)
        if confirm != None:
            dbdata['queue'] = self.detail_request
            dbdata['confirm'] = confirm

        self.config.queues['cache'].put(dbdata)

    def get_cache_return(self, task, **data):
        '''
        Wait for any returned data from the database
        If task is set perform the query
        '''
        if self.quit:
            return -1

        dbdata = {'parent': self, 'task':'query', task: data}
        self.config.queues['cache'].put(dbdata)
        self.state += 8
        value = self.cache_return.get(True)
        self.state -= 8
        if value == 'quit':
            self.ready = True
            return -1

        return value

    def get_ttvdb_id(self, name, lang='en', chanid=None):
        '''
        Search the database and/or theTVDB for an ID
        If it is not found on theTVDB store it to not check again for a 30 days
        If it is found on the TVDB retrieve and store the episode data
        Check self.pending_tids and self.queried_titles for pending searches
        '''
        def get_tid(idsource = 'from db'):
            if idsource == 'from db':
                data = self.get_cache_return('ttvdb_alias', alias=name)

            elif idsource == 'from ttvdb':
                data = self.query_ttvdb('seriesid', {'name': series_name, 'lang': lang}, chanid)

            else:
                return (0, None)

            if data == -1:
                # A quit request
                return (-1, None)

            elif not isinstance(data, list) or len(data) == 0:
                # Nothing found
                return (0, None)

            if len(data) == 1:
                return (data_value([0, 'tid'], data, int, 0), \
                            data_value([0, 'tdate'], data, datetime.date))

            tids = {}
            tidcnt = 0
            tindex = -1
            # Return the first unless another is more frequent
            for index in range(len(data)):
                rtid = data_value([index, 'tid'], data, int, 0)
                if rtid == 0:
                    continue

                if not rtid in tids.keys():
                    tids[rtid] = 1

                else:
                    tids[rtid] += 1

                if tids[rtid] > tidcnt:
                    tidcnt =  tids[rtid]
                    tindex = index

            if tindex > -1:
                rtid = data_value([tindex, 'tid'], data, int, 0)
                rtdate = data_value([tindex, 'tdate'], data, datetime.date)

                self.queried_titles[name.lower()] = rtid
                return (rtid, rtdate)

            return (0, None)

        def check_alias():
            alias = self.get_cache_return('ttvdb_alias', alias=name)
            if alias == -1:
                return -1

            if alias == None or len(alias) == 0:
                return {'tid':0, 'tdate': None, 'name': name, 'lang': None}

            return alias[0]

        tid = 0
        last_updated = None
        new_fetch = True
        #First check if a request has been done or is pending
        if name.lower() in self.queried_titles:
            tid = self.queried_titles[name.lower()]
            if tid == 0:
                return {'state': 0, 'tid': None}

            if tid in self.pending_tids.keys():
                # There already is a lookup underway
                return {'state': 3, 'tid': tid}

            return {'state': 1, 'tid': tid, 'tdate': last_updated, 'name': name}

        if tid == 0:
            (tid, last_updated) = get_tid('from db')
            if not isinstance(last_updated, datetime.date):
                new_fetch = None

            else:
                new_fetch = bool((datetime.date.today() - last_updated).days > 30)

            if tid == -1:
                self.ready = True
                return {'state': -1, 'tid': None}

            elif tid == 0:
                if new_fetch == False:
                    return {'state': 0, 'tid': None}

            elif tid in self.pending_tids.keys():
                # There already is a lookup underway
                return {'state': 3, 'tid': tid}

            elif new_fetch == False:
                return {'state': 1, 'tid': tid, 'tdate': last_updated, 'name': name}

            elif new_fetch and not self.config.opt_dict['only_cache']:
                data = self.query_ttvdb('last_updated', { 'ttvdbid': tid, 'lang': lang})
                if is_data_value([0, 'last updated'], data, datetime.datetime) and \
                    data_value([0, 'last updated'], data, datetime.datetime).date() < last_updated:
                        # No updates on theTVDB
                        return {'state': 1, 'tid': tid, 'tdate': last_updated, 'name': name}

        # First we look for a known alias
        series_name = check_alias()
        if series_name == -1:
            self.ready = True
            return {'state': -1, 'tid': None}

        if tid == 0:
            tid = series_name['tid']

        series_name = series_name['name']
        langs = self.config.ttvdb_langs
        if lang not in self.config.ttvdb_langs and lang in self.lang_list:
            langs.append(lang)

        try:
            aliasses = [series_name.lower(), name.lower()]
            if tid == 0 and not self.config.opt_dict['only_cache']:
                (tid, lu) = get_tid('from ttvdb')

            if tid == 0:
                # No data
                self.store_data('alias',
                                tid = 0,
                                name = series_name,
                                alias = aliasses)
                self.queried_titles[name.lower()] = 0
                self.queried_titles[series_name.lower()] = 0
                return {'state': 0, 'tid': None}

            if tid in self.pending_tids.keys():
                # There already is a lookup underway
                return {'state': 3, 'tid': tid}

            self.queried_titles[name.lower()] = tid
            if series_name.lower() != name.lower():
                self.queried_titles[series_name.lower()] = tid

            #We look for other languages
            data = self.query_ttvdb('seriesid', {'name': series_name, 'lang': 'all'}, chanid)
            if isinstance(data, list) and len(data) > 0:
                for index in range(len(data)):
                    if data_value([index, 'tid'], data, int) == tid and data_value([index, 'lang'], data, str) in langs:
                        if is_data_value([index, 'name'], data, str):
                            aname = data[index]['name'].lower()
                            if not aname in aliasses:
                                aliasses.append(aname)

                            if aname not in self.queried_titles.keys():
                                self.queried_titles[aname] = data_value([index, 'tid'], data, int, 0)

                self.store_data('alias',
                                tid = tid,
                                name = series_name,
                                alias = aliasses)

        except:
            self.config.log([self.config.text('ttvdb', 11), traceback.format_exc()])
            return {'state': 0, 'tid': None}

        # And we retreive the episodes
        epdata = self.get_all_episodes(tid, lang, chanid, name)
        if epdata['state'] == -1:
            return {'state': -1, 'tid': None}

        epdata['tdate'] = datetime.date.today()
        epdata['name'] = series_name
        return epdata

    def get_all_episodes(self, tid, lang='en', chanid=None, name = None):
        try:
            eps = []
            langs = self.config.ttvdb_langs[:]
            if isinstance(lang, list):
                for l in lang:
                    if l not in self.config.ttvdb_langs and l in self.lang_list:
                        langs.append(l)

            elif lang not in self.config.ttvdb_langs and lang in self.lang_list:
                langs.append(lang)

            while 'en' in langs:
                langs.remove('en')

            # We first retrieve the english data in the background
            queryid = self.query_ttvdb('episodes', {'ttvdbid': tid, 'lang': 'en', 'name': name}, chanid, True)
            if queryid == None:
                return {'state': 0, 'tid': tid}

            dtree = self.episodetrees[queryid]
            actkey, keycount = dtree.searchtree.progress_queue.get(True)

            if self.show_progres:
                # It's a call through the commandline so we give feed-back
                dtree.export = True
                qi = []
                qi.append(queryid)
                self.config.log([self.config.text('ttvdb', 11, ('en', name, tid, keycount), type = 'frontend')])
                for i in range(keycount):
                    keyno = dtree.searchtree.progress_queue.get(True)
                    self.config.log([self.config.text('ttvdb', 12, keyno, type = 'frontend')],log_target = 1)

                for l in langs:
                    queryid = self.query_ttvdb('episodes', {'ttvdbid': tid, 'lang': l, 'name': name}, chanid, True)
                    qi.append(queryid)
                    dtree = self.episodetrees[queryid]
                    dtree.export = True
                    keyno = dtree.searchtree.progress_queue.get(True)
                    self.config.log([self.config.text('ttvdb', 11, (l, name, tid, keyno[1]), type = 'frontend')])
                    for i in range(keyno[1]):
                        keyno = dtree.searchtree.progress_queue.get(True)
                        self.config.log([self.config.text('ttvdb', 12, keyno, type = 'frontend')],log_target = 1)

                for queryid in qi:
                    self.episodetrees[queryid].join()

            else:
                dtree.searchtree.show_progress = False
                if keycount > 500:
                    # It's to big so we stay with only the English data collection in the background
                    dtree.export = True
                    return {'state': 2, 'tid': tid, 'queryid':queryid}

                for l in langs:
                    # Collect the other languages in this thread
                    data = self.query_ttvdb('episodes', {'ttvdbid': tid, 'lang': l}, chanid)
                    if not isinstance(data, list):
                        # No data
                        continue

                    ep = self.process_data(data, tid, l)
                    eps.extend(ep[0])

                dtree.join()
                # And collect the data from the first thread
                data = dtree.result
                if len(data) == 0:
                    self.functions.update_counter('fail', self.proc_id, chanid)

                else:
                    ep = self.process_data(data, tid, 'en')
                    eps.extend(ep[0])
                    epc = []
                    for k, v in ep[1].items():
                        epc.append({'tid': tid,'sid': k, 'count': v})

                    self.store_data('epcount', data=epc)

        except:
            self.config.log([self.config.text('ttvdb', 12), traceback.format_exc()])
            return {'state': 0, 'tid': tid}

        # We store the data and let the database tell when it's available
        self.store_data('episodes', data=eps,
                confirm={'task': 'process_ep_info', 'queryid': queryid})
        return {'state': 2, 'tid': tid, 'queryid':queryid}

    def get_season_episode(self, parent = None, data = None, tid = None):
        def prepare_return(rdata, tid, lang):
            tepid = rdata.keys()[0]
            ept = data.get_value('episode title')
            if ept in ('', None):
                ept = data_value([tepid,'episode title', lang], rdata, str)

            if ept in ('', None):
                ept = data_value([tepid,'episode title', 'en'], rdata, str)

            if ept in ('', None):
                for k, v in data_value([tepid,'episode title'], rdata, dict):
                    if v not in ('', None):
                        ept = v
                        break

            return {'state': 1, 'data':{'ttvdbid': tid,
                    'ttvdbepid': tepid,
                    'season': data_value([tepid,'sid'], rdata, int, 0),
                    'episode': data_value([tepid,'eid'], rdata, int, 0),
                    'abs episode':data_value([tepid,'abseid'], rdata, int, 0),
                    'airdate': data_value([tepid,'airdate'], rdata, datetime.date, None),
                    'stitle': series_name,
                    'episode title': ept,
                    'description': data_value([tepid,'description',lang], rdata, str),
                    'star-rating': data_value([tepid,'star-rating'], rdata, float, None)}}

        if not isinstance(data, ProgramNode):
            return {'state': 0, 'tid': -1, 'data': None}

        if parent == None:
            parent = data.channel_config

        if parent.get_opt('disable_ttvdb') or parent.group in self.config.ttvdb_disabled_groups:
            # We do not lookup for regional channels and radio
            return {'state': 0, 'tid': -1, 'data': None}

        lang = self.config.group_language[parent.group]
        series_name = data.get_value('name')
        if tid == None:
            tid = self.get_ttvdb_id(series_name, lang, chanid = parent.chanid)
            if not isinstance(tid, dict) or tid['state'] == 0:
                self.config.log(self.config.text('ttvdb', 13, (series_name, parent.chan_name)), 128)
                # No ID
                return {'state': 0, 'tid': 0, 'data': None}

            elif tid['state'] == -1:
                # Quit signaled
                return {'state': -1, 'tid': -1, 'data': None}

            elif tid['state'] in (2, 3):
                # Request pending
                return tid

            series_name = tid['name']
            tid = tid['tid']

        eptitle = data.get_value('episode title')
        epno = data.get_value('episode')
        seno = data.get_value('season')
        # First if season and episode are known
        if data.is_set('episode') and data.is_set('season'):
            eps = self.get_cache_return('ep_by_id',  tid = tid, eid = epno, sid = seno)
            if eps == -1:
                return {'state': -1, 'tid': -1, 'data': None}

            if tid in eps.keys() and len(eps[tid]) == 1:
                # We only got one match so we return it
                self.config.log(self.config.text('ttvdb', 14,
                    (data.get_value('name'), data.get_value('episode title'))), 24)
                return prepare_return(eps.values()[0], tid, lang)

        # Next we just look for a matching subtitle (if set)
        if data.is_set('episode title') and eptitle != '':
            eid = self.get_cache_return('ep_by_title', **{'tid': tid, 'episode title': eptitle})
            if eid == -1:
                return {'state': -1, 'tid': -1, 'data': None}

            if tid in eid.keys() and len(eid[tid]) == 1:
                # We only got one match so we return it
                self.config.log(self.config.text('ttvdb', 14,
                    (data.get_value('name'), data.get_value('episode title'))), 24)
                return prepare_return(eid.values()[0], tid, lang)

        # What can we find on season/episode
        qdict = {'tid': tid}
        if data.is_set('episode'):
            qdict['eid'] = epno

        if data.is_set('season'):
            qdict['sid'] = seno

        eps = self.get_cache_return('ep_by_id', **qdict )
        if eps == -1:
            return {'state': -1, 'tid': -1, 'data': None}

        if tid in eps.keys() and len(eps[tid]) == 1:
            # We only got one match so we return it
            self.config.log(self.config.text('ttvdb', 14,
                (data.get_value('name'), data.get_value('episode title'))), 24)
            return prepare_return(eps.values()[0], tid, lang)

        # And on absolute episode numbers
        if data.is_set('episode'):
            absep = self.get_cache_return('ep_by_id', tid = tid, abseid = epno )
            if absep == -1:
                return {'state': -1, 'tid': -1, 'data': None}

            if tid in absep.keys() and len(absep[tid]) == 1:
                # We only got one match so we return it
                self.config.log(self.config.text('ttvdb', 14,
                    (data.get_value('name'), data.get_value('episode title'))), 24)
                return prepare_return(absep.values()[0], tid, lang)

        if data.is_set('episode title') and eptitle != '' and  tid in eps.keys() and len(eps[tid]) > 0:
            # Now we get a list of episodes matching what we already know
            # and compare with confusing characters removed
            subt = re.sub('[-,. ]', '', self.functions.remove_accents(data.get_value('episode title')).lower())
            ep_dict = {}
            ep_list = []
            for ep in eps[tid].values():
                for l, ept in ep['episode title'].items():
                    if ept == '':
                        continue

                    s = re.sub('[-,. ]', '', self.functions.remove_accents(ept).lower())
                    ep_list.append(s)
                    ep_dict[s] = ep
                    if s == subt:
                        self.config.log(self.config.text('ttvdb', 14,
                            (data.get_value('name'), data.get_value('episode title'))), 24)
                        return prepare_return(ep, tid, lang)

            # And finally we try a difflib match
            match_list = difflib.get_close_matches(subt, ep_list, 1, 0.7)
            if len(match_list) > 0:
                ep = ep_dict[match_list[0]]
                self.config.log(self.config.text('ttvdb', 14,
                    (data.get_value('name'), data.get_value('episode title'))), 24)
                return prepare_return(ep, tid, lang)

        self.config.log(self.config.text('ttvdb', 15,
            (data.get_value('name'), data.get_value('episode title'), parent.chan_name)), 128)
        return {'state': 0, 'tid': tid, 'data': None}

    def check_ttvdb_title(self, series_name, lang=None, ttvdbid = 0):
        def print_text(tcode, *args):
            if isinstance(tcode, int):
                text = self.config.text('ttvdb', tcode, args, type = 'frontend')

            else:
                text = tcode % args

            print text.encode(self.local_encoding, 'replace')

        if self.config.opt_dict['disable_ttvdb']:
            return(-1)

        self.show_progres = True
        if lang == None:
            lang = self.config.xml_language

        langs = list(set(self.config.group_language.values()))
        langs.extend(self.config.ttvdb_langs)
        if not 'en' in langs:
            langs.append('en')

        if lang in self.lang_list and not lang in langs:
            langs.append(lang)

        # Check if a record exists
        tid = self.get_cache_return('ttvdb_alias', alias=series_name)
        if tid == -1:
            return(-1)

        if ttvdbid != 0:
            data = self.query_ttvdb('last_updated', {'ttvdbid': ttvdbid, 'lang': 'en'})
            if data == None:
                print_text(14, ttvdbid)
                return(0)

            else:
                new_name = data[0]['name']

        if tid != None and len(tid) > 0:
            # It 's already in the DB
            elangs = []
            for ep in tid:
                elangs.append(data_value('lang', ep, str))

            elangs = list(set(elangs))
            langlist = u''
            for l in elangs:
                langlist = u'%s, %s' % (langlist, l)
            print_text(1, series_name,  tid[0]['tid'], tid[0]['name'])
            print_text(2, langlist[2:])
            old_tid = int(tid[0]['tid'])
            for l in elangs:
                langs.append(l)

        else:
            # It's not jet in the database
            print_text(3, series_name)
            old_tid = -1

        if ttvdbid != 0:
            if old_tid == -1:
                print_text(16, ttvdbid, new_name)

            elif ttvdbid != old_tid:
                print_text(13, ttvdbid, new_name)

            else:
                return(0)

            while True:
                print_text(15)
                ans = raw_input()
                if ans in ('y', 'Y'):
                    tid = data[0]
                    break

                elif ans in ('n', 'N'):
                    return(0)

        else:
            try:
                # Print what was found
                series_list = self.query_ttvdb('seriesid', {'name': series_name, 'lang': lang})
                if not isinstance(series_list, list):
                    series_list = [series_list]

                if not is_data_value([0, 'tid'], series_list, int):
                    print_text(4, series_name)
                    return(0)

                print_text(5)
                for s in range(len(series_list)):
                    print_text("%3.0f -> %9.0f: (%s) %s", s+1,
                                data_value([s, 'tid'], series_list, int), \
                                data_value([s, 'lang'], series_list, str), \
                                data_value([s, 'name'], series_list, str))

                # Ask to select the right one
                while True:
                    try:
                        print_text(6)
                        ans = raw_input()
                        selected_id = int(ans)-1
                        if 0 <= selected_id < len(series_list):
                            break

                    except ValueError:
                        if ans.lower() == "q":
                            return(0)

                tid = series_list[selected_id]

            except:
                traceback.print_exc()
                return(-1)

        try:
            # Get the English name and those for other languages
            langs = list(set(langs))
            aliasses = [series_name.lower()]
            ename = tid['name']
            for l in langs:
                data = self.query_ttvdb('last_updated', {'ttvdbid': tid['tid'], 'lang': l})
                aname = data_value([0, 'name'], data, str)
                if not aname.lower() in aliasses:
                    aliasses.append(aname.lower())

                if l == 'en'and not aname in (None, ''):
                    ename = aname

            if old_tid != int(tid['tid']):
                print_text(7)
                self.store_data('delete ttvdbid', tid = old_tid)

            if len(aliasses) > 0:
                # Add an alias record
                self.store_data('alias',
                                tid = int(tid['tid']),
                                name = ename,
                                alias = aliasses)
                if len(aliasses) == 2:
                    print_text(8, ename, aliasses[0], aliasses[1],  tid['tid'])

                else:
                    print_text(9, ename, aliasses[0],  tid['tid'])

            else:
                print_text(10, ename,  tid['tid'])

        except:
            traceback.print_exc()
            return(-1)

        epdata = self.get_all_episodes(int(tid['tid']), langs, name = ename)
        if epdata['state'] in (0,-1):
            return(-1)

        return(0)
    def is_data_value(self, searchpath, dtype = None, empty_is_false = True):
        return is_data_value(searchpath, self.source_data, dtype, empty_is_false)

    def data_value(self, searchpath, dtype = None, default = None):
        return data_value(searchpath, self.source_data, dtype, default)

# end theTVDB

class FetchData(URLtypes, Thread):
    """
    Generic Class to fetch the data

    The output is a list of programming in order where each row
    contains a dictionary with program information.
    It runs as a separate thread for every source
    """
    def __init__(self, config, proc_id, **source_data):
        self.source_data = source_data
        self.source = self.data_value('name', str)
        Thread.__init__(self, name = 'source-%s'% self.source)
        self.config = config
        self.functions = self.config.fetch_func
        # Flag to stop the thread
        self.quit = False
        self.ready = False
        self.has_started = False
        self.state = 0
        self.active = True
        self.lastrequest = None
        # The ID of the source
        self.proc_id = proc_id
        self.source_lock = RLock()
        # The queue to receive answers on database queries
        self.cache_return = Queue()
        # The queue to receive requests for detail fetches
        self.detail_request = Queue()
        self.pre_merge_request = Queue()
        self.config.queues['source'][self.proc_id] = self.detail_request
        self.thread_type = 'source'
        self.config.threads.append(self)
        self.page_status = dte.dtDataOK
        self.fetch_counter = 0

        self.all_channels = {}
        self.channels = {}
        self.chanids = {}
        self.all_chanids = {}
        self.channel_loaded = {}
        self.day_loaded = {}
        self.day_present = {}
        self.page_loaded = {}
        self.program_data = {}
        self.chan_count = 0
        self.fetch_ordinal = None
        self.site_tz = self.config.utc_tz
        self.offset_shift = 0
        self.item_count = 0
        self.current_item_count = 0
        self.total_item_count = 0
        self.groupitems = {}

        self.test_output = sys.stdout
        self.print_tags = False
        self.print_searchtree = False
        self.show_parsing = False
        self.print_roottree = False
        self.roottree_output = self.test_output
        self.show_result = False
        self.raw_output = self.test_output
        self.data_output = self.test_output
        self.new_cattrans = None
        self.cattrans_type = self.source_data.get('cattrans_type', None)
        self.detail_keys = []
        self.data = None
        self.rawdata = None
        self.update_base = []

        self.datatrees = {}
        try:
            self.config.sourceid_by_name[self.source] = self.proc_id
            self.language = self.source_data.get('language', 'en')
            self.is_virtual = self.source_data.get('is_virtual', False)
            self.detail_processor = self.source_data.get('detail_processor', False)
            self.site_tz = self.source_data.get('site-tz', pytz.utc)
            if self.detail_processor:
                if self.proc_id not in self.config.detail_sources:
                    self.detail_processor = False

                else:
                    self.config.detail_keys[self.proc_id] = {}
                    self.detail_processor = False
                    if 'detail' in self.source_data["detail_defs"]:
                        self.detail_processor = True
                        self.update_base = self.source_data["detail"].get("update-base", [])
                        self.detail_keys = self.source_data['detail']['provides']
                        self.config.detail_keys[self.proc_id]['detail'] = self.detail_keys
                        for k in self.detail_keys:
                            if k not in self.config.detail_keys['all']:
                                self.config.detail_keys['all'].append(k)

                    if 'detail2' in self.source_data["detail_defs"]:
                        self.detail_processor = True
                        self.detail2_keys = self.source_data['detail2']['provides']
                        self.config.detail_keys[self.proc_id]['detail2'] = self.detail2_keys
                        for k in self.detail2_keys:
                            if k not in self.config.detail_keys['all']:
                                self.config.detail_keys['all'].append(k)

            if self.proc_id in self.config.detail_sources and not self.detail_processor:
                self.config.detail_sources.remove(self.proc_id)

        except:
            self.config.validate_option('disable_source', value = self.proc_id)
            traceback.print_exc()

    def run(self):
        """The grabing thread"""
        # First some generic initiation that couldn't be done earlier in __init__
        self.state = 1
        detail_ids = {}
        idle_timeout = 900
        self.lastrequest = None
        if self.detail_processor:
            detail_idx = self.config.detail_sources.index(self.proc_id)

        def check_queue():
            # If the queue is empty
            pre_merge = False
            if not self.pre_merge_request.empty():
                qval = self.pre_merge_request.get()
                #~ self.config.log('f: Receiving pre_merge\n')
                pre_merge = True

            elif not self.detail_request.empty():
                qval = self.detail_request.get()

            else:
                self.functions.sleep()
                # if one of the previous detail sources in the order is still alive we wait for potential followup requests
                for ds in range(detail_idx):
                    if self.config.channelsource[self.config.detail_sources[ds]].is_alive():
                        return 0, pre_merge

                # Check if all channels are ready
                for channel in self.config.channels.values():
                    if channel.is_alive() and not channel.ready:
                        return 0, pre_merge

                # All channels are ready, so if there is nothing in the queue
                self.ready = True
                return -1, pre_merge

            self.lastrequest = self.config.in_output_tz('now')
            try:
                if self.quit:
                    return -1, pre_merge

                if qval['task'] == 'quit':
                    return -1, pre_merge

                # Is this the closing item for the channel?
                elif qval[ 'task'] == 'last_one':
                    if pre_merge:
                        #~ self.config.log('f: Terminating pre_merge for "%s" on "%s"\n' %
                            #~ (qval['parent'].name, self.name))
                        qval['parent'].pre_merge.put({'last_detail': self.proc_id})

                    else:
                        for ds in range(detail_idx + 1, len(self.config.detail_sources)):
                            ds_id = self.config.detail_sources[ds]
                            if self.config.channelsource[ds_id].is_alive():
                                self.config.queues['source'][ds_id].put(qval)
                                break

                        else:
                            qval['parent'].detail_return.put('last_detail')
                            qval['parent'].detail_data.set()

                    return 0, pre_merge

                else:
                    return qval, pre_merge

            except Empty:
                return 0, pre_merge

        try:
            self.init_channel_source_ids()
            self.has_started = True
            # Check if the source is not deactivated and if so set them all loaded
            if not (self.proc_id in self.config.opt_dict['disable_source'] or self.is_virtual):
                # Load and proccess al the program pages
                self.load_pages()

            self.ready = True
            self.set_loaded('channel')
            self.state = 0
            if self.config.write_info_files:
                self.config.infofiles.check_new_channels(self, self.config.source_channels)

        except:
            self.config.queues['log'].put({'fatal': [self.config.text('IO', 14), \
                traceback.format_exc(), '\n'], 'name': self.source})

            self.set_loaded('channel')
            self.state = 0
            self.ready = True
            return(98)

        try:
            if self.detail_processor and  not self.proc_id in self.config.opt_dict['disable_detail_source']:
                # We process detail requests, so we loop till we are finished
                self.state = 4
                self.lastrequest = self.config.in_output_tz('now')
                while True:
                    if self.quit:
                        self.ready = True
                        break

                    queue_val, pre_merge = check_queue()
                    if queue_val == -1:
                        # We Quit
                        break

                    if queue_val == 0 or not isinstance(queue_val, dict):
                        # We check again
                        continue

                    tdict = queue_val
                    parent = tdict['parent']
                    detail_ids = tdict['detail_ids']
                    logstring = tdict['logstring']
                    # be nice to the source site
                    self.functions.sleep()
                    try:
                        detailed_program = self.load_detailpage('detail', detail_ids[self.proc_id], parent)

                    except:
                        detailed_program = None
                        self.config.log([self.config.text('fetch', 23,
                            (detail_ids[self.proc_id]['detail_url'], )), traceback.format_exc()], 1)

                    # It failed! Check for a detail2 page
                    #~ if detailed_program == None and 'detail2' in self.source_data["detail_defs"]:
                        #~ try:
                            #~ detailed_program = self.load_detailpage('detail2', detail_ids[self.proc_id], parent)

                        #~ except:
                            #~ detailed_program = None
                            #~ self.config.log([self.config.text('fetch', 24, (detail_ids[self.proc_id]['detail_url'], )), traceback.format_exc()], 1)

                    # It failed! We check for alternative detail sources
                    if detailed_program == None:
                        for ds in range(detail_idx + 1, len(self.config.detail_sources)):
                            ds_id = self.config.detail_sources[ds]
                            if self.config.channelsource[ds_id].is_alive() and ds_id in detail_ids.keys():
                                self.config.queues['source'][ds_id].put(queue_val)
                                self.functions.update_counter('queue', ds_id,  parent.chanid)
                                break

                        else:
                            self.config.log(self.config.text('fetch', 31,
                                (self.source, parent.chan_name, tdict['counter'], logstring), type = 'report'), 8, 1)

                        self.functions.update_counter('queue', self.proc_id,  parent.chanid, False)
                        continue

                    # Success
                    detailed_program['sourceid'] = self.proc_id
                    detailed_program['name'] = detail_ids[self.proc_id]['name']
                    detailed_program['channelid'] = detail_ids[self.proc_id]['channelid']
                    detailed_program['prog_ID'] = detail_ids[self.proc_id]['prog_ID']
                    detailed_program['gen_ID'] = detail_ids[self.proc_id]['gen_ID']
                    if pre_merge:
                        self.config.log(self.config.text('fetch', 35,
                            (self.source, parent.chan_name, tdict['counter'], logstring), type = 'report'), 8, 1)
                        parent.pre_merge.put({'source': self.proc_id,
                            'data': detailed_program, 'counter': tdict['counter']})

                    else:
                        self.config.log(self.config.text('fetch', 32,
                            (self.source, parent.chan_name, tdict['counter'], logstring), type = 'report'), 8, 1)
                        parent.detail_return.put({'source': self.proc_id,
                            'data': detailed_program, 'counter': tdict['counter']})

                    self.functions.update_counter('queue', self.proc_id,  parent.chanid, False)

            self.state = 0
            self.ready = True

        except:
            if self.proc_id in detail_ids.keys() and 'detail_url' in detail_ids[self.proc_id].keys():
                self.config.queues['log'].put(
                    {'fatal': [self.config.text('IO', 15, (detail_ids[self.proc_id]['detail_url'])), \
                    traceback.format_exc(), '\n'], 'name': self.source})

            else:
                self.config.queues['log'].put({'fatal': [self.config.text('IO', 16),
                    traceback.format_exc(), '\n'], 'name': self.source})

            self.state = 0
            self.ready = True
            return(98)

    # The fetching functions
    def init_channel_source_ids(self):
        """Get the list of requested channels for this source from the channel configurations"""
        def check_for_channelid(cid, chan = None, is_child = False):
            if chan == None:
                if cid in self.config.channels.keys():
                    chan = self.config.channels[cid]

                else:
                    return

            channelid = chan.get_channelid(self.proc_id)
            if channelid != '':
                self.groupitems[channelid] = 0
                self.program_data[channelid] = []
                # Unless it is in empty channels we add it else set it ready
                if channelid in self.source_data['empty_channels'] or \
                    self.proc_id in chan.opt_dict['disable_source']:
                    self.set_loaded('channel', channelid)

                else:
                    if is_child:
                        chan.is_child = True

                    self.channels[cid] = channelid
                    if not channelid in self.all_chanids.keys():
                        self.all_chanids[channelid] = [cid]

                    elif not cid in self.all_chanids[channelid]:
                        self.all_chanids[channelid].append(cid)

        self.current_date = datetime.datetime.now(pytz.utc)
        self.current_sitedate = self.config.in_tz(self.current_date, self.site_tz)
        self.current_fetchdate = self.config.in_fetch_tz(self.current_date)
        self.current_ordinal = self.current_fetchdate.toordinal()
        self.sourcedbdata = self.get_cache_return('sources',
                        sourceid = self.proc_id,
                        name = self.source)
        if self.source_data['alt-url-code'] != None and self.sourcedbdata['use_alt_url']:
            for ptype in self.source_data["data_defs"]:
                self.source_data[ptype]['url'] = self.source_data[ptype]['alt-url']

        for chanid, channel in self.config.channels.items():
            # Is the channel active and this source for the channel not disabled
            if channel.active:
                # Is there a channelid for this channel
                check_for_channelid(chanid, channel)
                # Does the channel have child channels
                if chanid in self.config.combined_channels.keys():
                    # Then see if any of the childs has a sourceid for this source and does not have this source disabled
                    for c in self.config.combined_channels[chanid]:
                        check_for_channelid(c['chanid'], is_child = True)

        for channelid, chanidlist in self.all_chanids.items():
            if len(chanidlist) == 1:
                self.chanids[channelid] = chanidlist[0]

            else:
                for chanid in chanidlist:
                    if not self.config.channels[chanid].is_virtual_sub:
                        self.chanids[channelid] = chanid
                        break

                else:
                    self.chanids[channelid] = chanidlist[0]

        # To limit the output to the requested channels
        if "channelid" in self.source_data["base"]["value-filters"].keys() \
          and isinstance(self.source_data["base"]["value-filters"]["channelid"], list):
            self.source_data["base"]["value-filters"]["channelid"].extend(list(self.chanids.keys()))
            self.source_data["base"]["value-filters"]["channelid"].extend(list(self.source_data['alt-channels'].keys()))

    def get_page_data(self, ptype, **pdata):
        """
        Here for every fetch, the url is gathered, the page retreived and
        together with the data definition inserted in the DataTree module
        The then by the DataTree extracted data is return
        """
        def switch_url():
            self.store_data('toggle_alt_url')
            self.sourcedbdata['use_alt_url'] = not self.sourcedbdata['use_alt_url']
            for ptype in self.source_data["data_defs"]:
                if self.sourcedbdata['use_alt_url']:
                    self.source_data[ptype]['url'] = self.source_data[ptype]['alt-url']
                    if ptype in self.datatrees.keys():
                        self.datatrees[ptype].data_def['url'] = self.source_data[ptype]['url']

                else:
                    self.source_data[ptype]['url'] = self.source_data[ptype]['normal-url']
                    if ptype in self.datatrees.keys():
                        self.datatrees[ptype].data_def['url'] = self.source_data[ptype]['url']

        def update_counter(ptype, pstatus = "fail", tekst = None):
            if ptype in self.config.data_def_names["detail"]:
                c = pdata['channel'] if ('channel' in pdata.keys()) else None
                if pstatus == "fetched":
                    self.functions.update_counter('detail', self.proc_id, c)

                elif pstatus == "empty":
                    self.functions.update_counter('empty-detail', self.proc_id, c)

                else:
                    self.functions.update_counter(pstatus, self.proc_id, c)

            elif pstatus == "fetched":
                self.functions.update_counter('base', self.proc_id)

            elif pstatus == "empty":
                self.functions.update_counter('empty-base', self.proc_id)

            else:
                self.functions.update_counter(pstatus, self.proc_id)

            if tekst != None:
                if self.print_roottree:
                    self.roottree_output.write('%s\n' % tekst)

                if self.config.write_info_files:
                    u = url[0]
                    if len(url[3]) > 0:
                        u += '?'
                        for k, v in url[3].items():
                            u += '%s=%s&' % (k, v)

                        u = u[:-1]

                    self.config.infofiles.add_url_failure('%s: %s\n' % (tekst, u))

        self.offset_shift = 0
        self.page_status = dte.dtDataOK
        self.data = None
        incomplete = False
        try:
            if not is_data_value(ptype, self.datatrees, DataTreeShell):
                self.datatrees[ptype] = DataTree(self, self.source_data[ptype], 'always', self.proc_id)

            # For the url we use the fetch timezone and date, not the site timezone and page date
            self.datatrees[ptype].set_timezone(self.config.fetch_tz)
            self.datatrees[ptype].set_current_date(self.current_ordinal)
            # Set the counter for the statistics and some other defaults
            if ptype in self.config.data_def_names["channel"]:
                pdata['start'] = 0
                pdata['end'] = 0
                pdata[ 'offset'] = 0
                counter = ['base', self.proc_id, None]

            elif ptype in self.config.data_def_names["detail"]:
                counter = ['detail', self.proc_id, pdata['channel']]

            else:
                counter = ['base', self.proc_id, None]

            url_type = self.source_data[ptype]["url-type"]
            pdata[ 'cnt-offset'] = pdata.get('cnt_offset', 0)
            for retry in (0, 1):
                # Get the URL
                url = list(self.datatrees[ptype].get_url(pdata, False))
                url.append(self.source_data[ptype]["cookiejar"])
                if self.source_data[ptype]["alt_useragent"]:
                    url[2]['User-Agent'] = self.config.user_agents2[random.randint(0, len(self.config.user_agents2)-1)]

                if url == None:
                    self.config.log([self.config.text('fetch', 25, (ptype, self.source))], 1)
                    update_counter(ptype)
                    self.page_status = dte.dtURLerror
                    return

                if self.print_roottree:
                    if self.roottree_output == sys.stdout:
                        self.roottree_output.write(u'pdata = %s' % pdata)
                        prtdata = ('url', 'encoding', 'accept_header', 'url_data', 'is_json', 'cookiejar')
                        for index in range(len(url)):
                            self.roottree_output.write(('%s = %s'%
                                (prtdata[index], url[index])).encode('utf-8', 'replace'))

                    else:
                        self.roottree_output.write(u'pdata = %s\n' % pdata)
                        prtdata = ('url', 'encoding', 'accept_header', 'url_data', 'is_json', 'cookiejar')
                        for index in range(len(url)):
                            self.roottree_output.write((u'%s = %s\n'% (prtdata[index], url[index])))

                update_counter(ptype, "fetched")
                # Get the Page
                self.page_status, page, pcode = self.functions.get_page(*url)
                # Do an URL swap if needed and try again
                if pcode != None and int(pcode) ==  self.source_data['alt-url-code']:
                    switch_url()

                else:
                    break

            if self.page_status == dte.dtIncompleteRead:
                incomplete = True
                self.page_status = dte.dtDataOK

            if self.page_status == dte.dtEmpty:
                update_counter(ptype, "empty",u'No Data.')
                return

            if self.page_status == dte.dtDataOK:
                # Find the startnode
                if self.datatrees[ptype].init_data(page) or self.datatrees[ptype].searchtree == None:
                    self.config.log([self.config.text('fetch', 26, (ptype, url[0]))], 1)
                    self.page_status = dte.dtDataInvalid

            if self.page_status != dte.dtDataOK:
                if self.page_status in (dte.dtEmpty, dte.dtNoData):
                    update_counter(ptype, "empty",u'No Data.')

                else:
                    if incomplete:
                        self.page_status = dte.dtIncompleteRead

                    update_counter(ptype)
                    if self.print_roottree:
                        self.datatrees[ptype].print_datatree(fobj = self.roottree_output, from_start_node = False)
                        self.roottree_output.write(u'Data Error = %s: %s' %
                            (self.page_status, dte.errortext(self.page_status)))

                return

            if self.print_roottree:
                self.datatrees[ptype].print_datatree(fobj = self.roottree_output, from_start_node = False)

            # We reset the timezone and check if needed on the right date
            self.datatrees[ptype].set_timezone()
            if ptype == 'base':
                # We set the current date
                if self.getsoneday(url_type):
                    cdate = self.current_sitedate.toordinal() + pdata['offset']
                    # We check on the right offset
                    if len(self.source_data[ptype]["data"]["today"]) > 0:
                        cd = self.datatrees[ptype].searchtree.find_data_value(\
                            self.source_data[ptype]["data"]["today"], searchname = 'for the current date')
                        if not isinstance(cd, datetime.date):
                            # The search does not return a valid date, so we throw the page out
                            self.config.log([self.config.text('fetch', 27, (url[0], )),])
                            update_counter(ptype)
                            self.page_status = dte.dtInvalidDate
                            return

                        self.offset_shift = cd.toordinal() - self.current_ordinal
                        if self.offset_shift != 0:
                            # We got the data for a different date then asked. We keep it for now
                            cdate += self.offset_shift
                            self.page_status = dte.dtShiftedDate

                    self.datatrees[ptype].set_current_date(cdate)
                    self.datatrees[ptype].searchtree.set_current_date(cdate)

                # We extract the current _item_count and the total_item_count
                if self.getsrecordgroup(url_type):
                    self.total_item_count = self.datatrees[ptype].searchtree.find_data_value(\
                        self.source_data[ptype]['data']["total-item-count"], searchname = 'for the total-item-count')
                    self.current_item_count = self.datatrees[ptype].searchtree.find_data_value(\
                        self.source_data[ptype]['data']["page-item-count"], searchname = 'for the page-item-count')

            # Extract the data
            dtcode = self.datatrees[ptype].extract_datalist()
            self.page_status = dtcode
            if dtcode != dte.dtDataOK:
                if incomplete:
                    self.page_status = dte.dtIncompleteRead
                    update_counter(ptype, tekst = 'Incomplete Read')

                elif dtcode == dte.dtNoData:
                    update_counter(ptype, "empty", 'No DataTree Data')

                else:
                    self.page_status = dte.dtDataInvalid
                    update_counter(ptype, tekst = 'DataTree Error %s' % (dtcode,))

                return

            self.data = self.datatrees[ptype].result[:]
            self.rawdata = self.datatrees[ptype].searchtree.result[:]
            if self.show_result:
                if self.raw_output == sys.stdout:
                    for p in self.rawdata:
                        if isinstance(p[0], (str, unicode)):
                            self.raw_output.write(p[0].encode('utf-8', 'replace'))
                        else:
                            self.raw_output.write(p[0])
                        for v in range(1,len(p)):
                            if isinstance(p[v], (str, unicode)):
                                self.raw_output.write('    "%s"' % p[v].encode('utf-8', 'replace'))
                            else:
                                self.raw_output.write('    %s' % p[v])

                else:
                    for p in self.rawdata:
                        self.raw_output.write(u'%s\n' % p[0])
                        for v in range(1,len(p)):
                            if isinstance(p[v], (str, unicode)):
                                self.raw_output.write(u'    "%s"\n' % p[v])
                            else:
                                self.raw_output.write(u'    %s\n' % p[v])

            # we extract a channel list if available
            if not self.config.test_modus and ptype == 'base' and \
              "base-channels"in self.source_data["channel_defs"] and len(self.all_channels) == 0:
                self.datatrees[ptype].init_data_def(self.source_data["base-channels"])
                if not self.datatrees[ptype].extract_datalist():
                    self.get_channels(self.datatrees[ptype].result)

                self.datatrees[ptype].init_data_def(self.source_data["base"])

            if len(self.data) == 0:
                self.data = None
                self.page_status = dte.dtNoData
                update_counter(ptype, "empty", 'No DataTree Data')
                return

            return

        except dtWarning as e:
            self.config.log(self.config.text('fetch', 14, (e.message, ptype, self.source)))
            self.functions.update_counter('fail', self.proc_id)
            self.data = None
            self.page_status = dte.dtDataInvalid
            return

        except:
            self.config.log([self.config.text('fetch', 29, (ptype, self.source)), traceback.format_exc()], 1)
            self.functions.update_counter('fail', self.proc_id)
            self.data = None
            self.page_status = dte.dtUnknownError
            return

    def get_channels(self, data_list = None):
        """The code for the retreiving a list of supported channels"""
        self.all_channels ={}
        self.lineup_changes = []
        channel_list = None
        if data_list == None:
            if "channels" in self.source_data["channel_defs"]:
                ptype = "channels"

            elif "channel_list" in self.source_data["channel_defs"]:
                # The channels are defined in the datafile
                self.all_channels = self.source_data["channel_list"]
                return

            elif "base-channels" in self.source_data["channel_defs"]:
                ptype = "base-channels"

            else:
                return

            #extract the data
            for retry in (0, 1):
                self.get_page_data(ptype)
                if self.page_status in (dte.dtDataOK, dte.dtURLerror, dte.dtNoData, dte.dtEmpty):
                    channel_list = self.data
                    break

        else:
            # The list is extracted from a base page
            ptype = "base-channels"
            channel_list = data_list

        if isinstance(channel_list, list):
            empty_channels = copy(self.source_data['empty_channels'])
            chanids = {}
            for chanid, channel in self.config.channels.items():
                channelid = channel.get_channelid(self.proc_id)
                if channelid != '':
                    chanids[channelid] = chanid

            channelids = {}
            for chanid, channelid in self.config.source_channels[self.proc_id].items():
                channelids[channelid] = chanid

            for channel in channel_list:
                # link the data to the right variable, doing any defined adjustments
                if "inactive_channel" in channel.keys() and channel["inactive_channel"]:
                    continue

                if "channelid" in channel.keys():
                    channelid = unicode(channel["channelid"])
                    if channelid in self.source_data['alt-channels'].keys():
                        channel['channelid'] = self.source_data['alt-channels'][channelid][0]
                        channel['name'] = self.source_data['alt-channels'][channelid][1]
                        channelid = unicode(channel['channelid'])

                    self.all_channels[channelid] = channel
                    if self.show_result:
                        if self.data_output == sys.stdout:
                            self.data_output.write('%s: %s' % (channelid, channel['name']))
                            if channelid in empty_channels:
                                empty_channels.remove(channelid)
                                if channelid in channelids.keys():
                                    self.data_output.write('        Marked as empty but still present in "sourcechannels" as "%s"' % channelids[channelid])
                                    self.lineup_changes.append('Empty channelID "%s" on %s still present in "sourcechannels" as "%s"\n' \
                                        % (channelid, self.source, channelids[channelid]))

                                else:
                                    self.data_output.write('        Marked as empty')

                            elif channelid in chanids.keys():
                                self.data_output.write('        chanid: %s' % (chanids[channelid]))
                                del chanids[channelid]

                            else:
                                self.data_output.write('        Without a chanid set in "source_channels"')

                            for k, v in channel.items():
                                if isinstance(v, (str, unicode)):
                                    self.data_output.write('        %s: "%s"'.encode('utf-8', 'replace') % (k, v))
                                else:
                                    self.data_output.write('        %s: %s' % (k, v))

                        else:
                            self.data_output.write('%s: %s\n' % (channelid, channel['name']))
                            if channelid in empty_channels:
                                empty_channels.remove(channelid)
                                if channelid in channelids.keys():
                                    self.data_output.write('        Marked as empty but still present in "sourcechannels" as "%s"\n' % channelids[channelid])
                                    self.lineup_changes.append('Empty channelID "%s" on %s still present in "sourcechannels" as "%s"\n' \
                                        % (channelid, self.source, channelids[channelid]))

                                else:
                                    self.data_output.write('        Marked as empty\n')

                            elif channelid in chanids.keys():
                                self.data_output.write('        chanid: %s\n' % (chanids[channelid]))
                                del chanids[channelid]

                            else:
                                self.data_output.write('        Without a chanid set in "source_channels"\n')

                            for k, v in channel.items():
                                if isinstance(v, (str, unicode)):
                                    self.data_output.write('        %s: "%s"\n' % (k, v))
                                else:
                                    self.data_output.write('        %s: %s\n' % (k, v))

                    elif self.config.write_info_files:
                        if channelid in self.source_data['empty_channels'] and channelid in channelids.keys():
                            self.lineup_changes.append('Empty channelID "%s" on %s still present in "sourcechannels" as "%s"\n' \
                                % (channelid, self.source, channelids[channelid]))

        else:
            self.config.log(self.config.text('fetch', 30, (self.source, )))
            return 69

    def load_pages(self):
        """The code for the actual Grabbing and dataprocessing of the base pages"""
        def log_fetch():
            log_array = []
            if self.getsonechannel(url_type):
                log_array =['\n', self.config.text('fetch', 1, \
                    (self.config.channels[chanid].chan_name, self.config.channels[chanid].xmltvid , \
                    (self.config.channels[chanid].get_opt('compat') and self.config.compat_text or ''), self.source), type = 'report')]

                if self.getsoneday(url_type):
                    log_array.append(self.config.text('fetch', 4, (channel_cnt, len(self.channels), offset, self.config.opt_dict['days']), type = 'report'))

                elif self.getsalldays(url_type):
                    log_array.append(self.config.text('fetch', 5, (channel_cnt, len(self.channels), '6'), type = 'report'))

                elif self.getsdaygroup(url_type):
                    log_array.append(self.config.text('fetch', 6, (channel_cnt, len(self.channels), page_idx, len(fetch_range)), type = 'report'))

                elif self.getsrecordgroup(url_type):
                    log_array.append(self.config.text('fetch', 7, (channel_cnt, len(self.channels), self.config.opt_dict['days'], base_count), type = 'report'))

            else:
                if self.getsallchannels(url_type):
                    log_array =['\n', self.config.text('fetch', 2, (len(self.channels), self.source), type = 'report')]

                elif self.getschannelgroup(url_type):
                    log_array =['\n', self.config.text('fetch', 3, (channelgrp, self.source), type = 'report')]

                else:
                    return

                if self.getsoneday(url_type):
                    log_array.append(self.config.text('fetch', 8, (offset, self.config.opt_dict['days']), type = 'report'))

                elif self.getsalldays(url_type):
                    log_array.append(self.config.text('fetch', 9, (self.config.opt_dict['days'],), type = 'report'))

                elif self.getsdaygroup(url_type):
                    log_array.append(self.config.text('fetch', 10, (page_idx, len(fetch_range)), type = 'report'))

                elif self.getsrecordgroup(url_type):
                    log_array.append(self.config.text('fetch', 11, (self.config.opt_dict['days'], base_count), type = 'report'))

            self.config.log(log_array, 2)
        # end log_fetch()

        def log_fail():
            if url_type == 1:
                self.config.log(self.config.text('fetch', 15, (self.config.channels[chanid].chan_name, self.source, offset), type = 'report'))

            elif url_type == 2:
                self.config.log(self.config.text('fetch', 19, (offset, self.source), type = 'report'))

            elif url_type == 3:
                self.config.log(self.config.text('fetch',23 , (channelgrp, self.source, offset), type = 'report'))

            elif url_type == 5:
                self.config.log(self.config.text('fetch', 16, (self.config.channels[chanid].chan_name, self.source), type = 'report'))

            elif url_type == 6:
                self.config.log(self.config.text('fetch', 20, (self.source, ), type = 'report'))

            elif url_type == 7:
                self.config.log(self.config.text('fetch',24 , (channelgrp, self.source), type = 'report'))

            elif url_type == 9:
                self.config.log(self.config.text('fetch', 17, (self.config.channels[chanid].chan_name, self.source, page_idx), type = 'report'))

            elif url_type == 10:
                self.config.log(self.config.text('fetch', 21, (page_idx, self.source), type = 'report'))

            elif url_type == 11:
                self.config.log(self.config.text('fetch',25 , (channelgrp, self.source, page_idx), type = 'report'))

            elif url_type == 13:
                self.config.log(self.config.text('fetch', 18, (self.config.channels[chanid].chan_name, self.source, base_count), type = 'report'))

            elif url_type == 14:
                self.config.log(self.config.text('fetch', 22, (base_count, self.source), type = 'report'))

            elif url_type == 15:
                self.config.log(self.config.text('fetch',26 , (channelgrp, self.source, base_count), type = 'report'))
        # end log_fail()

        def get_weekstart(ordinal = None, offset = 0, sow = None):
            if sow == None:
                return offset

            if ordinal == None:
                ordinal = self.current_ordinal

            weekday = int(datetime.date.fromordinal(ordinal + offset).strftime('%w'))
            first_day = offset + sow - weekday
            if weekday < sow:
                first_day -= 7

            return first_day
        # end get_weekstart()

        def get_counter(total):
            self.fetch_counter += 1
            return 100*float(self.fetch_counter)/float(total)
        # end get_counter()

        def do_final_processing(channelid):
            chanid = self.chanids[channelid]
            if not chanid in self.config.channelprogram_rename.keys():
                self.config.channelprogram_rename[chanid] = {}

            good_programs = []
            pgaps = []
            # Some sanity Check
            if len(self.program_data[channelid]) > 0:
                self.program_data[channelid].sort(key=lambda program: (program['start-time']))
                plen = len(self.program_data[channelid])
                last_stop = None
                min_gap = datetime.timedelta(minutes = 30)
                if self.config.channels[chanid].get_opt('pre_merge', self.proc_id):
                    url_list = [None]
                    self.fetch_counter = 0
                    fetch_order = list(range(plen))
                    random.shuffle(fetch_order)
                    for index in fetch_order:
                        counter = get_counter(plen)
                        p = self.program_data[channelid][index]
                        prog_id = p.get('prog_ID', None)
                        if prog_id == None:
                            continue

                        detail_url = p.get('detail_url', None)
                        if detail_url in url_list:
                            continue

                        cache_detail = self.get_cache_return('programdetails',
                                        sourceid = self.proc_id,
                                        channelid = channelid,
                                        prog_ID = prog_id)
                        if cache_detail == -1:
                            return -1

                        elif len(cache_detail) > 0:
                            for k in self.update_base:
                                cdval = cache_detail[0].get(k, None)
                                if cdval != None:
                                    p[k] = cdval
                                    if k == 'length':
                                        p['stop from length'] = True
                                        p['stop-time'] = p['start-time'] + p['length']

                        elif detail_url != None:
                            url_list.append(detail_url)
                            logstring = u' %s: %s' % (p['start-time'].strftime('%d %b %H:%M'), p['name'])

                            sources = {self.proc_id: {
                                    'chanid': chanid,
                                    'channelid': channelid,
                                    'name': p['name'],
                                    'detail_url': detail_url,
                                    'prog_ID': prog_id,
                                    'gen_ID': p['gen_ID']},
                                'channelid': channelid,
                                'chanid': chanid,
                                'name': p['name'],
                                'detail_url': detail_url,
                                'prog_ID': prog_id,
                                'gen_ID': p['gen_ID']}

                            self.pre_merge_request.put(
                                {'task':'get_details','detail_ids': sources, 'logstring': logstring,
                                    'counter': counter, 'parent': self.config.channels[chanid]})

                            #~ self.config.log('f: fetching: %s\n' % detail_url)

                for index in range(plen):
                    p = self.program_data[channelid][index]
                    if not 'name' in p.keys() or not isinstance(p['name'], (unicode, str)) or p['name'] == u'':
                        continue

                    p['name'] = unicode(p['name'])
                    pname = p['name'].lower().strip()
                    if pname in self.config.channelprogram_rename[chanid].keys():
                        p['name'] = self.config.channelprogram_rename[chanid][pname]

                    if index < plen - 1:
                        p2 = self.program_data[channelid][index + 1]
                        if 'stop from length' in p.keys() and p['stop from length'] and \
                            not 'last of the page' in p.keys():
                            if p['stop-time'] > p2['start-time']:
                                p['stop-time'] = copy(p2['start-time'])

                        if not 'stop-time' in p.keys() or not isinstance(p['stop-time'], datetime.datetime):
                            if 'last of the page' in p.keys() and not \
                                ('length' in self.update_base or 'stop-time' in self.update_base):
                                continue

                            p['stop-time'] = copy(p2['start-time'])

                        if not 'length' in p.keys() or not isinstance(p['length'], datetime.timedelta):
                            p['length'] = p['stop-time'] - p['start-time']

                        if 'last of the page' in p.keys():
                            # Check for a program split by the day border
                            if p[ 'name'].lower() == p2[ 'name'].lower() and p['stop-time'] >= p2['start-time'] \
                              and ((not 'episode title' in p and not 'episode title' in p2) \
                                or ('episode title' in p and 'episode title' in p2 \
                                and p[ 'episode title'].lower() == p2[ 'episode title'].lower())):
                                    p2['start-time'] = copy(p['start-time'])
                                    continue

                    elif index == plen - 1 and'stop-time' in p.keys() and \
                      isinstance(p['stop-time'], datetime.datetime):
                        if not 'length' in p.keys() or not isinstance(p['length'], datetime.timedelta):
                            p['length'] = p['stop-time'] - p['start-time']

                        while p['length'] > tdd:
                            p['length'] -= tdd

                        p['stop-time'] = p['start-time'] + p['length']

                    else:
                        continue

                    if p['stop-time'] <= p['start-time']:
                        continue

                    if last_stop != None and (p['start-time'] - last_stop) > min_gap:
                        pgaps.append((last_stop, copy(p['start-time']), last_name, p['name'].lower()))

                    last_stop = copy(p['stop-time'])
                    last_name = p['name'].lower()
                    good_programs.append(p)

                self.pre_merge_request.put({'task':'last_one', 'parent': self.config.channels[chanid]})
                #~ self.config.log('f: Terminating prefetch for "%s" on "%s"\n' %
                    #~ (self.config.channels[chanid].name, self.name))

            # Retrieve what is in the cache with a day earlier and later added
            cache_programs = self.get_cache_return('sourceprograms',
                            sourceid = self.proc_id,
                            channelid = channelid,
                            scandate = range( first_day - 1 , min(max_days, last_day) +1))
            if cache_programs == -1:
                return -1

            # And add in the cached programs
            if len(good_programs) > 0:
                good_programs.sort(key=lambda program: (program['start-time']))
                fetch_start = good_programs[0]['start-time']
                fetch_end = good_programs[-1]['stop-time']
                cache_delete = []
                cache_add = []
                if len(cache_programs) > 0:
                    # Retrieve those days from the cache
                    for day in site_range:
                        if cached[channelid][day] and not self.get_loaded('realday', channelid, day):
                            self.config.log(self.config.text('fetch', 12,
                                (day, self.config.channels[chanid].chan_name, self.source), type = 'report'), 2)

                    for p in cache_programs:
                        p['source'] = self.source
                        p['chanid'] = chanid
                        p['channelid'] = channelid
                        p['channel']  = self.config.channels[chanid].chan_name
                        p['length'] = p['stop-time'] - p['start-time']
                        p['from cache'] = True
                        gn = '' if not 'group name' in p or p['group name'] == None else p['group name']
                        et = '' if not 'episode title' in p or p['episode title'] == None else p['episode title']
                        p['title'] = (gn, p['name'], et)
                        g = '' if not 'genre' in p or p['genre'] == None else p['genre']
                        sg = '' if not 'subgenre' in p or p['subgenre'] == None else p['subgenre']
                        p['genres'] = (g, sg)
                        if 'group' in p.keys() and not p['group'] in (None, ''):
                            self.groupitems[channelid] += 1

                        if p['stop-time'] <= fetch_start or p['start-time'] >= fetch_end:
                            cache_add.append(p)
                            continue

                        elif p['start-time'] <= fetch_start and \
                            (fetch_start - p['start-time']) > (p['stop-time'] - fetch_start) and \
                            p['name'].lower() != good_programs[0]['name'].lower():
                            cache_add.append(p)
                            continue

                        elif p['stop-time'] >= fetch_end and \
                            (fetch_end - p['start-time']) < (p['stop-time'] - fetch_end) and \
                            p['name'].lower() != good_programs[-1]['name'].lower():
                            cache_add.append(p)
                            continue

                        for pg in pgaps:
                            if pg[0] <= p['start-time'] <= pg[1] and pg[0] <= p['stop-time'] <= pg[1]:
                                cache_add.append(p)
                                break

                            elif pg[0] <= p['start-time'] <= pg[1] and \
                                (pg[1] - p['start-time']) > (p['stop-time'] - pg[1]) and p['name'].lower() != pg[3]:
                                cache_add.append(p)
                                break

                            elif pg[0] <= p['stop-time'] <= pg[1] and \
                                (p['stop-time'] - pg[0]) > (pg[0] - p['start-time']) and p['name'].lower() != pg[2]:
                                cache_add.append(p)
                                break

                        else:
                            cache_delete.append(p['start-time'])

                    # Delete the new fetched programs from the cache
                    if len(cache_delete) > 0:
                        self.store_data('delete_programs',
                                        **{'sourceid': self.proc_id,
                                        'channelid': channelid,
                                        'start-time': cache_delete})

                # Update the cache
                new_ranges= []
                for day in site_range:
                    if self.get_loaded('realday', channelid, day):
                        new_ranges.append(day)

                self.store_data('add_programs',
                            programs = deepcopy(good_programs),
                            channelid = channelid,
                            laststop = fetch_end,
                            fetcheddays = new_ranges)
                good_programs.extend(cache_add)

            elif len(cache_programs) > 0:
                for p in cache_programs:
                    p['source'] = self.source
                    p['chanid'] = chanid
                    p['channelid'] = channelid
                    p['channel']  = self.config.channels[chanid].chan_name
                    p['length'] = p['stop-time'] - p['start-time']
                    p['from cache'] = True
                    if 'group' in p.keys() and not p['group'] in (None, ''):
                        self.groupitems[channelid] += 1

                    good_programs.append(p)

            # Add any repeat group
            if len(good_programs) > 0:
                good_programs.sort(key=lambda program: (program['start-time']))
                if self.groupitems[channelid] > 0:
                    group_start = False
                    for p in good_programs[:]:
                        if 'group' in p.keys():
                            # Collecting the group
                            if not group_start:
                                group = []
                                start = p['start-time']
                                group_start = True

                            if 'proc_ID' in p:
                                p['gen_ID'] = p['proc_ID']
                            group.append(p.copy())
                            group_duur = p['stop-time'] - start

                        elif group_start:
                            # Repeating the group
                            group_start = False
                            group_eind = p['start-time']
                            group_length = group_eind - start
                            if group_length > datetime.timedelta(hours = 12):
                                # Probably a week was not grabbed
                                group_eind -= datetime.timedelta(days = int(group_length.days))

                            repeat = 0
                            while True:
                                repeat+= 1
                                for g in group[:]:
                                    gdict = g.copy()
                                    gdict['start-time'] += repeat*group_duur
                                    if gdict['start-time'] < group_eind:
                                        gdict['stop-time'] += repeat*group_duur
                                        if gdict['stop-time'] > group_eind:
                                            gdict['stop-time'] = group_eind

                                        gdict['length'] = gdict['stop-time'] - gdict['start-time']
                                        gdict['offset'] = self.functions.get_offset(gdict['start-time'])
                                        gdict['scandate'] = self.functions.get_fetchdate(gdict['start-time'])
                                        gdict['prog_ID'] = ''
                                        gdict['rerun'] = True
                                        good_programs.append(gdict)

                                    else:
                                        break

                                else:
                                    continue

                                break

            # And keep only the requested range
            self.program_data[channelid] = []
            for p in good_programs[:]:
                if p['offset'] in full_range:
                    self.program_data[channelid].append(p)

            self.set_loaded('channel', channelid)
            self.set_loaded('day', channelid)
        # end do_final_processing()

        if len(self.channels) == 0  or not "base" in self.source_data["base_defs"]:
            return

        tdd = datetime.timedelta(days=1)
        cached = {}
        laststop = {}
        first_day = self.config.opt_dict['offset']
        last_day = first_day + self.config.opt_dict['days']
        max_days = self.source_data["base"]["max days"]
        if first_day > max_days:
            self.set_loaded('channel')
            return

        full_range = range( first_day, last_day)
        site_range = range( first_day, min(max_days, last_day))
        step_start = first_day
        offset_step = 1
        fetch_range = site_range
        for channelid, chanid in self.chanids.items():
            self.program_data[channelid] = []
            cached[channelid] = self.get_cache_return('fetcheddays',
                            sourceid = self.proc_id,
                            channelid = channelid,
                            scandate = list(full_range))
            if cached[channelid] == -1:
                return -1

            ls = self.get_cache_return('laststop',
                            sourceid = self.proc_id,
                            channelid = channelid)
            if ls == -1:
                return -1

            laststop[channelid] = ls['laststop']  if isinstance(ls, dict) and \
                isinstance(ls['laststop'], datetime.datetime) else None

        url_type = self.source_data["base"]["url-type"]
        # Check which groups contain requested channels
        if self.getschannelgroup(url_type):
            changroups = self.source_data["base"][ "url-channel-groups"]
            fgroup = {}
            for channelgrp in changroups:
                fgroup[channelgrp] = self.get_cache_return('chan_scid',
                                sourceid = self.proc_id,
                                fgroup = channelgrp)
                if fgroup[channelgrp] == -1:
                    return -1

                for chan in fgroup[channelgrp][:]:
                    if not chan['channelid'] in self.chanids.keys():
                        fgroup[channelgrp].remove(chan)

        # Check which days and up to what date are available in the cache
        for channelid, chanid in self.chanids.items():
            self.program_data[channelid] = []
            cached[channelid] = self.get_cache_return('fetcheddays',
                            sourceid = self.proc_id,
                            channelid = channelid,
                            scandate = list(full_range))
            if cached[channelid] == -1:
                return

            ls = self.get_cache_return('laststop',
                            sourceid = self.proc_id,
                            channelid = channelid)
            if ls == -1:
                return -1

            laststop[channelid] = ls['laststop']  if isinstance(ls, dict) and \
                isinstance(ls['laststop'], datetime.datetime) else None

        max_fetch_days = 6
        max_failure_count = 4
        # Just process the days retrieved from the cache
        if self.config.opt_dict['only_cache']:
            for channelid, chanid in self.chanids.items():
                if do_final_processing(channelid) == -1:
                    return -1

            return

        elif self.getsoneday(url_type):
            # vrt.be, tvgids.tv
            if self.getsonechannel(url_type):
                fetch_range = {}
                for channelid, chanid in self.chanids.items():
                    fetch_range[channelid] = []
                    for day in site_range:
                        if day == 0 or cached[channelid][day] != True:
                            fetch_range[channelid].append(day)
                            if len(fetch_range[channelid]) == max_fetch_days:
                                break

            # tvgids.nl, vpro.nl, primo.eu, oorboekje.nl
            elif self.getsallchannels(url_type):
                fetch_range = []
                for day in site_range:
                    for channelid, chanid in self.chanids.items():
                        if day == 0 or cached[channelid][day] != True:
                            fetch_range.append(day)
                            break

                    if len(fetch_range) == max_fetch_days:
                        break

            #humo.be, npo.nl
            elif self.getschannelgroup(url_type):
                fetch_range = {}
                for channelgrp in changroups:
                    fetch_range[channelgrp] = []
                    for day in site_range:
                        for chan in fgroup[channelgrp]:
                            if day == 0 or cached[chan['channelid']][day] != True:
                                fetch_range[channelgrp].append(day)
                                break

                        if len(fetch_range[channelgrp]) == max_fetch_days:
                            break

        elif self.getsalldays(url_type):
            if self.getsonechannel(url_type):
                fetch_range = {}
                for channelid, chanid in self.chanids.items():
                    fetch_range[channelid] = ['all']

            # rtl.nl
            elif self.getsallchannels(url_type):
                fetch_range = ['all']

            elif self.getschannelgroup(url_type):
                # ToDo
                return

        elif self.getsdaygroup(url_type):
            if self.source_data["base"]["url-date-range"] == 'week':
                sow = self.source_data["base"]["url-date-week-start"]
                offset_step = 7
                step_start = get_weekstart(self.current_ordinal, first_day, sow)
                if self.getsonechannel(url_type):
                    fetch_range = {}
                    for channelid, chanid in self.chanids.items():
                        fetch_range[channelid] = []
                        for daygroup in range(step_start, last_day, offset_step):
                            for day in site_range:
                                if day in site_range and (day == 0 or cached[channelid][day] != True):
                                    fetch_range[channelid].append(daygroup)
                                    break

                elif self.getsallchannels(url_type):
                    fetch_range = []
                    for daygroup in range(step_start, last_day, offset_step):
                        for day in site_range:
                            for channelid, chanid in self.chanids.items():
                                if day in site_range and (day == 0 or cached[channelid][day] != True):
                                    fetch_range.append(daygroup)
                                    break

                            else:
                                continue

                            break

                elif self.getschannelgroup(url_type):
                    # ToDo
                    return

            elif isinstance(self.source_data["base"]["url-date-range"], int):
                offset_step = self.source_data["base"]["url-date-range"]
                # nieuwsblad.be
                if self.getsonechannel(url_type):
                    fetch_range = {}
                    for channelid, chanid in self.chanids.items():
                        fetch_range[channelid] = []
                        start = None
                        for day in site_range:
                            if day == 0 or cached[channelid][day] != True:
                                if start == None or day > start + offset_step:
                                    fetch_range[channelid].append(day)
                                    start = day

                elif self.getsallchannels(url_type):
                    fetch_range = []
                    start = None
                    for day in site_range:
                        for channelid, chanid in self.chanids.items():
                            if day == 0 or cached[channelid][day] != True:
                                if start == None or day > start + offset_step:
                                    fetch_range.append(day)
                                    start = day
                                    break

                elif self.getschannelgroup(url_type):
                    # ToDo
                    return

            else:
                return

        elif self.getsrecordgroup(url_type):
            # horizon.nl
            self.item_count = self.source_data['base']['default-item-count']
            if self.getsonechannel(url_type):
                fetch_range = {}
                for channelid, chanid in self.chanids.items():
                    fetch_range[channelid] = []
                    start = None
                    days = 0
                    for day in site_range:
                        if day == 0 or cached[channelid][day] != True:
                            days += 1
                            if start == None:
                                start = day

                        elif start != None:
                            fetch_range[channelid].append((start, days))
                            start = None
                            days = 0

                    if start != None:
                        fetch_range[channelid].append((start, days))

            elif self.getsallchannels(url_type):
                fetch_range = []
                start = None
                days = 0
                for day in site_range:
                    for channelid, chanid in self.chanids.items():
                        if day == 0 or cached[channelid][day] != True:
                            days += 1
                            if start == None:
                                start = day

                            break

                    else:
                        continue

                    if start != None:
                        fetch_range.append((start, days))
                        start = None
                        days = 0

                if start != None:
                    fetch_range.append((start, days))


            elif self.getschannelgroup(url_type):
                # ToDo
                return

        try:
            first_fetch = True
            if self.getsonechannel(url_type):
                maxoffset = {}
                for retry in (0, 1):
                    channel_cnt = 0
                    for channelid, chanid in self.chanids.items():
                        channel_cnt += 1
                        failure_count = 0
                        empty_count = 0
                        fetch_count = 0
                        if not channelid in maxoffset.keys():
                            maxoffset[channelid] = None

                        if self.quit:
                            return

                        if self.get_loaded('channel', channelid):
                            continue

                        if self.getsrecordgroup(url_type):
                            if self.item_count == 0:
                                return

                            base_count = 0
                            for fset in fetch_range[channelid]:
                                if fset == maxoffset[channelid]:
                                    self.config.log(self.config.text('fetch', 39,
                                        (self.source, self.config.channels[chanid].chan_descr)))
                                    break

                                self.current_item_count = self.item_count
                                page_count = 0
                                while self.current_item_count == self.item_count:
                                    if self.quit:
                                        return

                                    # Check if it is already loaded
                                    if self.get_loaded('page', channelid, base_count):
                                        page_count += 1
                                        base_count += 1
                                        fetch_count += 1
                                        continue

                                    log_fetch()
                                    if not first_fetch:
                                        # be nice to the source
                                        self.functions.sleep()

                                    first_fetch = False
                                    self.get_page_data('base', channel = channelid,
                                                                cnt_offset = page_count,
                                                                start = fset[0],
                                                                end = fset[0] + fset[1],
                                                                back = -fset[0],
                                                                ahead = fset[0] +fset[1]-1)

                                    if self.page_status == dte.dtDataOK:
                                        strdata = self.data

                                    elif self.page_status == dte.dtNoData:
                                        # We asume this is the last day with data
                                        maxoffset[channelid] = fset
                                        self.config.log(self.config.text('fetch', 39,
                                            (self.source, self.config.channels[chanid].chan_descr)))
                                        break

                                    else:
                                        if retry == 1:
                                            log_fail()

                                        failure_count += 1
                                        base_count += 1
                                        page_count += 1
                                        fetch_count += 1
                                        if failure_count > max_failure_count:
                                            break

                                        continue

                                    self.parse_basepage(strdata, channelid = channelid)
                                    self.set_loaded('page', channelid, base_count)
                                    page_count += 1
                                    base_count += 1
                                    fetch_count += 1

                                self.set_loaded('day', channelid, range(fset[0], fset[0] + fset[1]))

                        else:
                            page_idx = 0
                            for offset in fetch_range[channelid]:
                                if offset == maxoffset[channelid]:
                                    self.config.log(self.config.text('fetch', 39,
                                        (self.source, self.config.channels[chanid].chan_descr)))
                                    break

                                page_idx += 1
                                # Check if it is already loaded
                                if self.getsdaygroup(url_type):
                                    if self.get_loaded('page', channelid, offset):
                                        continue

                                else:
                                    if self.get_loaded('day', channelid, offset):
                                        continue

                                if not first_fetch:
                                    # be nice to the source
                                    self.functions.sleep()

                                first_fetch = False
                                log_fetch()
                                self.get_page_data('base', channel = channelid,
                                                            offset = offset,
                                                            start = first_day,
                                                            end = min(max_days, last_day),
                                                            back = -first_day,
                                                            ahead = min(max_days, last_day)-1)
                                if self.page_status == dte.dtDataOK:
                                    strdata = self.data

                                elif self.page_status == dte.dtShiftedDate:
                                    strdata = self.data

                                elif self.page_status == dte.dtNoData:
                                    # We asume this is the last page with data
                                    maxoffset[channelid] = offset
                                    self.config.log(self.config.text('fetch', 39,
                                        (self.source, self.config.channels[chanid].chan_descr)))
                                    break

                                else:
                                    if retry == 1:
                                        log_fail()

                                    failure_count += 1
                                    continue

                                if self.getsoneday(url_type):
                                    self.parse_basepage(strdata, offset = offset + self.offset_shift, channelid = channelid)

                                else:
                                    self.parse_basepage(strdata, offset = offset, channelid = channelid)

                                if self.getsoneday(url_type):
                                    self.set_loaded('day', channelid, offset)

                                elif self.getsalldays(url_type):
                                    self.set_loaded('day', channelid)

                                elif self.getsdaygroup(url_type):
                                    self.set_loaded('day', channelid, range(offset, offset + offset_step))
                                    self.set_loaded('page', channelid, offset)


                        if failure_count == 0 or retry == 1:
                            if do_final_processing(channelid) == -1:
                                return -1

            elif self.getsallchannels(url_type):
                for retry in (0, 1):
                    failure_count = 0
                    if self.quit:
                        return

                    if self.getsdaygroup(url_type):
                        # We fetch a set number of  days in one
                        return

                    elif self.getsrecordgroup(url_type):
                        # We fetch a set number of  records in one
                        return

                    else:
                        for offset in fetch_range:
                            if self.quit:
                                return

                            # Check if it is already loaded
                            if self.get_loaded('day', 0, offset):
                                continue

                            if not first_fetch:
                                # be nice to the source
                                self.functions.sleep()

                            first_fetch = False
                            log_fetch()
                            self.get_page_data('base', channels = self.chanids.keys(),
                                                        offset = offset,
                                                        start = first_day,
                                                        end = min(max_days, last_day),
                                                        back = -first_day,
                                                        ahead = min(max_days, last_day)-1)
                            if self.page_status == dte.dtDataOK:
                                strdata = self.data

                            elif self.page_status == dte.dtShiftedDate:
                                strdata = self.data

                            else:
                                if retry == 1:
                                    log_fail()

                                failure_count += 1
                                continue

                            self.set_loaded('day', 0, offset)
                            if self.getsoneday(url_type):
                                self.parse_basepage(strdata, offset = offset + self.offset_shift)

                            else:
                                self.parse_basepage(strdata, offset = offset)

                    if failure_count == 0 or retry == 1:
                        for channelid, chanid in self.chanids.items():
                            if do_final_processing(channelid) == -1:
                                return -1

                        break

            elif self.getschannelgroup(url_type):
                for retry in (0, 1):
                    for channelgrp in self.source_data["base"]["url-channel-groups"]:
                        failure_count = 0
                        if self.quit:
                            return

                        if len(fgroup[channelgrp]) == 0:
                            continue

                        #humo.be
                        if self.getsoneday(url_type):
                            for offset in fetch_range[channelgrp]:
                                if self.quit:
                                    return

                                for chan in fgroup[channelgrp]:
                                    if not self.get_loaded('day', chan['channelid'], offset):
                                        break

                                else:
                                    continue

                                if not first_fetch:
                                    # be nice to the source
                                    self.functions.sleep()

                                first_fetch = False
                                log_fetch()
                                self.get_page_data('base', channelgrp = channelgrp,
                                                            offset = offset,
                                                            start = first_day,
                                                            end = min(max_days, last_day),
                                                            back = -first_day,
                                                            ahead = min(max_days, last_day)-1)
                                if self.page_status == dte.dtDataOK:
                                    strdata = self.data

                                elif self.page_status == dte.dtShiftedDate:
                                    strdata = self.data

                                else:
                                    if retry == 1:
                                        log_fail()

                                    failure_count += 1
                                    continue

                                channelids = self.parse_basepage(strdata,
                                                channelgrp = channelgrp,
                                                offset = offset + self.offset_shift)
                                if isinstance(channelids, list):
                                    self.set_loaded('day', channelids, offset)

                        elif self.getsalldays(url_type):
                            # We fetch all days in one
                            return

                        elif self.getsdaygroup(url_type):
                            # We fetch a set number of  days in one
                            return

                        elif self.getsrecordgroup(url_type):
                            # We fetch a set number of  records in one
                            return


                    if failure_count == 0 or retry == 1:
                        for channelid, chanid in self.chanids.items():
                            if do_final_processing(channelid) == -1:
                                return -1

                        break

        except:
            self.config.log([self.config.text('fetch', 31, (self.source,)),
                                    self.config.text('fetch', 32), traceback.format_exc()], 0)
            self.set_loaded('channel')
            return None

    def parse_basepage(self, fdata, **subset):
        """Process the data retreived from DataTree for the base pages"""
        channelids = []
        last_start = {}
        tdd = datetime.timedelta(days=1)
        tdh = datetime.timedelta(hours=1)
        if isinstance(fdata, list):
            last_stop = None
            for program in fdata:
                if 'channelid' in program.keys():
                    channelid = unicode(program['channelid'])
                    if channelid in self.source_data['alt-channels'].keys():
                        program['channelid'] = self.source_data['alt-channels'][channelid][0]
                        channelid = unicode(program['channelid'])

                elif 'channelid' in subset.keys():
                    channelid = subset['channelid']

                else:
                    continue

                # it's not requested
                if not channelid in self.chanids.keys():
                    continue

                # A list of processed channels to send back
                if not channelid in channelids:
                    channelids.append(channelid)

                if not channelid in last_start.keys():
                    last_start[channelid] = None

                chanid = self.chanids[channelid]
                if not 'prog_ID' in program.keys():
                    program['prog_ID'] = ''

                tdict = {}
                tdict['sourceid'] = self.proc_id
                tdict['source'] = self.source
                tdict['channelid'] = channelid
                tdict['chanid'] = chanid
                tdict['prog_ID'] = ''
                tdict['channel']  = self.config.channels[chanid].chan_name
                tdict['from cache'] = False
                if  not 'name' in program.keys() or program['name'] == None or program['name'] == '':
                    # Give it the Unknown Program Title Name, to mark it as a groupslot.
                    program['name'] = self.config.unknown_program_title
                    tdict['is_gap'] = True
                    #~ self.config.log(self.config.text('fetch', 33,
                        #~ (program['prog_ID'], self.config.channels[chanid].chan_descr, self.source)))
                    #~ continue

                if 'stop-time' in program.keys() and isinstance(program['stop-time'], datetime.datetime):
                    tdict['stop-time'] = program['stop-time']
                elif "alt-stop-time" in program and isinstance(program["alt-stop-time"], datetime.datetime):
                    tdict['stop-time'] = program["alt-stop-time"]

                plength = None
                if "length" in program and isinstance(program['length'], datetime.timedelta):
                    plength = program["length"]
                    tdict["length"] = plength

                if 'start-time' in program.keys() and isinstance(program['start-time'], datetime.datetime):
                    tdict['start-time'] = program['start-time']
                elif "alt-start-time" in program and isinstance(program["alt-start-time"], datetime.datetime):
                    tdict['start-time'] = program["alt-start-time"]
                elif plength != None and 'stop-time' in tdict.keys():
                    tdict['start-time'] = tdict['stop-time'] - plength
                    tdict['start from length'] = True
                elif self.source_data["base"]["data-format"] == "text/html" and \
                    isinstance(last_stop, datetime.datetime):
                    tdict['start-time'] = last_stop
                else:
                    # Unable to determin a Start Time
                    self.config.log(self.config.text('fetch', 34, (program['name'], tdict['channel'], self.source)))
                    continue

                if plength != None:
                    if not 'stop-time' in tdict.keys():
                        tdict['stop-time'] = tdict['start-time'] + plength
                        tdict['stop from length'] = True

                    else:
                        alength = tdict['stop-time'] - tdict['start-time']

                if self.source_data['without-full-timings'] and \
                    self.source_data["base"]["data-format"] == "text/html":
                    # This is to catch the midnight date change for HTML pages with just start(stop) times without date
                    # don't enable it on json pages where the programs are in a dict as they will not be in chronological order!!!
                    if last_start[channelid] == None:
                        last_start[channelid] = tdict['start-time']

                    while tdict['start-time'] < last_start[channelid] - tdh:
                        tdict['start-time'] += tdd

                    last_start[channelid] = tdict['start-time']
                    if 'stop-time' in tdict.keys():
                        while tdict['stop-time'] < tdict['start-time']:
                            tdict['stop-time'] += tdd

                tdict['offset'] = self.functions.get_offset(tdict['start-time'])
                tdict['scandate'] = self.functions.get_fetchdate(tdict['start-time'])
                if self.source_data["base"]["data-format"] == "text/html":
                    if 'stop-time' in tdict.keys():
                        last_stop = tdict['stop-time']
                    else:
                        last_stop = None

                # Add any known value that does not need further processing
                for k, v in self.process_values(program).items():
                    if k in ('channelid', 'video', 'start-time', 'stop-time', 'length'):
                        continue

                    tdict[k] = v

                if 'group' in program.keys() and not program['group'] in (None, ''):
                    self.groupitems[channelid] += 1
                    tdict['group'] = program['group']

                with self.source_lock:
                    self.program_data[channelid].append(tdict)

                #~ self.config.genre_list.append((tdict['genre'].lower(), tdict['subgenre'].lower()))

                if self.show_result:
                    self.print_result(tdict, channelid)

            if len(channelids) > 0:
                for channelid in channelids:
                    if len(self.program_data[channelid]) > 0:
                        self.program_data[channelid][-1]['last of the page'] = True

        return channelids

    def load_detailpage(self, ptype, pdata, parent = None):
        """The code for retreiving and processing a detail page"""
        if pdata['detail_url'] in (None, ''):
            return

        self.get_page_data(ptype, channel = pdata['chanid'], detailid = pdata['detail_url'])
        if self.page_status == dte.dtDataOK:
            strdata = self.data

        else:
            self.config.log(self.config.text('fetch', 35, (pdata['detail_url'], )), 1)
            return

        values = strdata[0]
        if not isinstance(values, dict):
            return

        if not 'genre' in values.keys() and 'org-genre' in pdata.keys():
            values['genre'] = pdata['org-genre']

        if not 'subgenre' in values.keys() and 'org-subgenre' in pdata.keys():
            values['subgenre'] = pdata['org-subgenre']

        tdict = self.process_values(values)
        if self.show_result:
            self.print_result(tdict, pdata['channelid'])

        return tdict

    # Helper functions
    def print_result(self, tdict, channelid):
        try:
            start = self.config.in_output_tz(tdict['start-time']).strftime('%d %b %H:%M')

        except:
            start = '? ??? ?:??'

        if self.data_output == sys.stdout:
            self.data_output.write('%s: %s' % (channelid, start))
            for k, v in tdict.items():
                if isinstance(v, (str, unicode)):
                    self.data_output.write('        %s: "%s"'.encode('utf-8', 'replace') % (k, v))
                else:
                    self.data_output.write('        %s: %s' % (k, v))

        else:
            self.data_output.write('%s: %s\n' % (channelid, start))
            for k, v in tdict.items():
                if isinstance(v, (str, unicode)):
                    self.data_output.write('        %s: "%s"\n' % (k, v))

                elif isinstance(v, list) and len(v) > 0:
                    vv = '        %s: ' % (k, )
                    for item in v:
                        if isinstance(item, (str, unicode)):
                            vv = '%s"%s"\n                ' % (vv, item)

                        elif k == 'actor' and isinstance(item, dict):
                            vv = '%s%s: "%s"\n                ' % (vv, item['role'], item['name'])

                        else:
                            vv = '%s%s\n                ' % (vv, item)

                    self.data_output.write(vv.rstrip(' '))

                else:
                    self.data_output.write('        %s: %s\n' % (k, v))

    def store_data(self, task, **data):
        '''
        Store any fetched data in the Database
        Optionally ask the database to confirm storing the data
        '''
        if task == 'toggle_alt_url':
            dbdata = {'task':'update', 'toggle_alt_url': {'sourceid': self.proc_id}}

        elif task == 'add_programs':
            dbdata = {'task':'add', 'sourceprograms': data['programs'],
                'laststop': {'laststop': data['laststop'],
                    'sourceid': self.proc_id, 'channelid': data['channelid']},
                'fetcheddays': {'scandate': data['fetcheddays'],
                    'sourceid': self.proc_id, 'channelid': data['channelid']}}

        elif task == 'delete_programs':
            dbdata ={'task':'delete', 'sourceprograms': data}

        else:
            return

        self.config.queues['cache'].put(dbdata)

    def get_cache_return(self, task, **data):
        '''
        Wait for any returned data from the database
        If task is set perform the query
        '''
        if self.quit:
            return -1

        dbdata = {'parent': self, 'task':'query', task: data}
        self.config.queues['cache'].put(dbdata)
        self.state += 8
        value = self.cache_return.get(True)
        self.state -= 8
        if value == 'quit':
            self.ready = True
            return -1

        return value

    def is_data_value(self, searchpath, dtype = None, empty_is_false = True):
        return is_data_value(searchpath, self.source_data, dtype, empty_is_false)

    def data_value(self, searchpath, dtype = None, default = None):
        return data_value(searchpath, self.source_data, dtype, default)

    def get_loaded(self, type='day', channelid = 0, day = None):
        chanlist = list(self.chanids.keys())
        chanlist.append(0)
        offset = self.config.opt_dict['offset']
        if type == 'day':
            if channelid in self.day_loaded.keys():
                if day in self.day_loaded[channelid].keys():
                    return self.day_loaded[channelid][day]

                elif day == 'all' and offset in self.day_loaded[channelid].keys():
                    return self.day_loaded[channelid][offset]

        if type == 'realday':
            if channelid in self.day_present.keys():
                if day in self.day_present[channelid].keys():
                    return self.day_present[channelid][day]

                elif day == 'all' and offset in self.day_loaded[channelid].keys():
                    return self.day_present[channelid][offset]

            return False

        if type == 'channel':
            if channelid in self.channel_loaded.keys():
                return self.channel_loaded[channelid]

            return False

        if type == 'page':
            if channelid in self.page_loaded.keys() and day in self.page_loaded[channelid].keys():
                return self.page_loaded[channelid][day]

            return False

    def set_loaded(self, type='day', channelid = None, day = None, value=True):
        chanlist = list(self.chanids.keys())
        chanlist.append(0)
        offset = self.config.opt_dict['offset']
        daylist = range( offset, (offset + self.config.opt_dict['days']))
        if isinstance(channelid, (list, tuple)):
            chanlist = channelid

        elif channelid not in (None, 'all', 0):
            chanlist = [channelid]

        if type == 'day':
            if isinstance(day, (list, tuple)):
                daylist = day

            elif day not in (None, 'all'):
                daylist = [day]

            for channelid in chanlist:
                if not channelid in self.day_loaded.keys():
                    self.day_loaded[channelid] = {}

                if not channelid in self.day_present.keys():
                    self.day_present[channelid] = {}

                for day in daylist:
                    self.day_loaded[channelid][day] = value
                    self.day_present[channelid][day+self.offset_shift] = value

        if type == 'channel':
            for channelid in chanlist:
                self.channel_loaded[channelid] = value
                if value and channelid in self.all_chanids.keys():
                    for chanid in self.all_chanids[channelid]:
                        self.config.channels[chanid].source_ready(self.proc_id).set()

        if type == 'page':
            if isinstance(day, (list, tuple)):
                pagelist = day

            elif isinstance(day, int):
                pagelist = [day]

            else:
                return

            for channelid in chanlist:
                if channelid in chanlist:
                    if not channelid in self.page_loaded.keys():
                        self.page_loaded[channelid] = {}

                    for day in pagelist:
                        self.page_loaded[channelid][day] = value

    def process_values(self, values):
        tdict = {}
        # Add any known value that does not need further processing
        for k, v in values.items():
            if k in ('video', 'genre', 'subgenre'):
                continue

            if k in self.config.key_values['text'] and not v in (None, ''):
                tdict[k] = v

            elif (k in self.config.key_values['bool'] or k in self.config.key_values['video']) and  isinstance(v, bool):
                tdict[k] = v

            elif k in self.config.key_values['int'] and isinstance(v, int):
                tdict[k] = v

            elif k in self.config.key_values['list'] and isinstance(v, list) and len(v) > 0:
                tdict[k] = v

            elif k in self.config.key_values['timedelta'] and isinstance(v, datetime.timedelta):
                tdict[k] = v

            elif k in self.config.key_values['datetime'] and isinstance(v, datetime.datetime):
                tdict[k] = v

            elif k in self.config.key_values['date'] and isinstance(v, datetime.date):
                tdict[k] = v

            elif k in self.config.roletrans.keys() and isinstance(v, (list, tuple)) and len(v) > 0:
                if not self.config.roletrans[k] in tdict.keys() or len(tdict[self.config.roletrans[k]]) == 0:
                    tdict[self.config.roletrans[k]] = v

                for item in v:
                    if not item in tdict[self.config.roletrans[k]]:
                        tdict[self.config.roletrans[k]].append(item)

            elif k in self.config.credit_keys and isinstance(v, dict):
                for k2, v2 in v.items():
                    if k2 in self.config.roletrans.keys() and isinstance(v2, (list, tuple)) and len(v2) > 0:
                        if not self.config.roletrans[k2] in tdict.keys() or len(tdict[self.config.roletrans[k2]]) == 0:
                            tdict[self.config.roletrans[k2]] = v2

                        for item in v2:
                            if not item in tdict[self.config.roletrans[k2]]:
                                tdict[self.config.roletrans[k2]].append(item)

        if 'genre' in values.keys() or 'subgenre' in values.keys() or 'genres' in values.keys():
            gg = self.get_genre(values)
            tdict['genre'] = gg[0]
            tdict['subgenre'] = gg[1]
            tdict['genres'] =(gg[0], gg[1])
            if len(gg) > 2:
                tdict['org-genre'] = gg[2]

            if len(gg) > 3:
                tdict['org-subgenre'] = gg[3]

        if 'name' in tdict:
            tdict = self.check_title_name(tdict)
        return tdict

    def get_genre(self, values):
        """Sub process for parse_basepage"""
        genre = ''
        subgenre = ''
        if 'genres'in values:
            # It is return as a set of genre/subgenre so we split them
            if isinstance(values['genres'], (str,unicode)):
                values['genre'] = values['genres']

            if isinstance(values['genres'], (list,tuple)):
                if len(values['genres'])> 0:
                    values['genre'] = values['genres'][0]

                if len(values['genres'])> 1:
                    values['subgenre'] = values['genres'][1]

        if self.cattrans_type == 1:
            if self.new_cattrans == None:
                self.new_cattrans = {}

            if 'genre' in values:
                # Just in case it is a comma seperated list
                if isinstance(values['genre'], (str, unicode)):
                    gg = values['genre'].split(',')

                elif isinstance(values['genre'], list):
                    gg = values['genre']

                else:
                    gg = ['']

                gs0 =gg[0].strip()
                gs1 = u''
                gg0 = gs0.lower()
                if len(gg) > 1:
                    gs1 = gg[1].strip()

                elif 'subgenre' in values and values['subgenre'] not in (None, ''):
                    gs1 = values['subgenre'].strip()

                gg1 = gs1.lower()
                if gg0 in self.source_data['cattrans'].keys():
                    if gg1 in self.source_data['cattrans'][gg0].keys():
                        genre = self.source_data['cattrans'][gg0][gg1][0].strip()
                        subgenre = self.source_data['cattrans'][gg0][gg1][1].strip()

                    elif gg1 not in (None, ''):
                        genre = self.source_data['cattrans'][gg0]['default'][0].strip()
                        subgenre = gs1
                        self.new_cattrans[(gg0, gg1)] = (
                                self.source_data['cattrans'][gg0]['default'][0].strip().lower(), gg1)

                    else:
                        genre = self.source_data['cattrans'][gg0]['default'][0].strip()
                        subgenre = self.source_data['cattrans'][gg0]['default'][1].strip()
                        self.new_cattrans[(gg0,gg1)] = (
                                self.source_data['cattrans'][gg0]['default'][0].strip().lower(),
                                self.source_data['cattrans'][gg0]['default'][1].strip().lower())

                elif gg0 not in (None, ''):
                    if gg1 not in (None, ''):
                        self.new_cattrans[(gg0,gg1)] = [self.config.cattrans_unknown.lower().strip(),'']

                    else:
                        self.new_cattrans[(gg0,'')] = [self.config.cattrans_unknown.lower().strip(),'']

                    if self.config.write_info_files:
                        if 'subgenre' in values and values['subgenre'] not in (None, ''):
                            self.config.infofiles.addto_detail_list(u'unknown %s genre/subgenre => ["%s", "%s"]' %
                                (self.source, values['genre'], values['subgenre']))

                        else:
                            self.config.infofiles.addto_detail_list(u'unknown %s genre => %s' %
                                (self.source, values['genre']))

                return (genre, subgenre, gs0, gs1)

        elif self.cattrans_type == 2:
            if self.new_cattrans == None:
                self.new_cattrans = []
            if 'subgenre' in values and values['subgenre'] not in (None, ''):
                if values['subgenre'].lower().strip() in self.source_data['cattrans'].keys():
                    genre = self.source_data['cattrans'][values['subgenre'].lower().strip()]
                    subgenre = values['subgenre'].strip()

                else:
                    for k, v in self.source_data['cattrans_keywords'].items():
                        if k.lower() in values['subgenre'].lower():
                            genre = v.strip()
                            subgenre = values['subgenre'].strip()
                            self.new_cattrans.append((subgenre.lower(), genre.lower()))
                            break

                    else:
                        self.new_cattrans.append((values['subgenre'].lower().strip(),
                                        self.config.cattrans_unknown.lower().strip()))

                    if self.config.write_info_files:
                        self.config.infofiles.addto_detail_list(u'unknown %s subgenre => "%s"' %
                            (self.source, values['subgenre']))

        else:
            if is_data_value('genre', values, str, True):
                genre = values['genre'].strip()

            if is_data_value('subgenre', values, str, True):
                subgenre = values['subgenre'].strip()

        return (genre, subgenre, genre, subgenre)

    def check_title_name(self, values):
        """
        Process Title names on Grouping issues and apply the rename table
        Return the updated Progam dict
        """
        pgroup = ''
        ptitle = data_value('name', values, str, '').strip()
        psubtitle = data_value('episode title', values, str, '').strip()
        if  ptitle == None or ptitle == '':
            return values

        if re.sub('[-,. ]', '', ptitle).lower() == re.sub('[-,. ]', '', psubtitle).lower():
            del(values['episode title'])
            psubtitle = ''

        # Remove a groupname if in the list
        for group in self.config.groupnameremove:
            if (len(ptitle) > len(group) + 3) and (ptitle[0:len(group)].lower() == group):
                p = ptitle.split(':', 1)
                if len(p) >1:
                    self.config.log(self.config.text('fetch', 36,  (group, ptitle)), 64)
                    if self.config.write_info_files:
                        self.config.infofiles.addto_detail_list(unicode('Group removing = \"%s\" from \"%s\"' %
                            (group, ptitle)))

                    ptitle = p[1].strip()

        if ptitle.lower() == psubtitle.lower() and not ('genre'in values and values['genre'] == 'serie/soap'):
            psubtitle = ''

        lent = len(ptitle)
        lenst = len(psubtitle)
        lendif = abs(lent - lenst)
        # Fixing subtitle both named and added to the title
        if  0 < lenst < lent and psubtitle.lower() == ptitle[lendif:].lower().strip():
            ptitle = ptitle[:lendif].strip()
            if (ptitle[-1] == ':') or (ptitle[-1] == '-'):
                ptitle = ptitle[:-1].strip()

        # It also happens that the title is both and the subtitle only the title
        elif 0 < lenst < lent and psubtitle.lower() == ptitle[:lendif].lower():
            p = psubtitle
            psubtitle = ptitle[lendif:].lower().strip()
            ptitle = p
            if (psubtitle[0] == ':') or (psubtitle[0] == '-'):
                psubtitle = psubtitle[1:].strip()

        # And the other way around
        elif  lent < lenst and ptitle.lower() == psubtitle[:lent].lower():
            psubtitle = psubtitle[lent:].strip()
            if (psubtitle[0] == ':') or (psubtitle[0] == '-'):
                psubtitle = psubtitle[1:].strip()

        # exclude certain programs
        if  not (ptitle.lower() in self.config.notitlesplit) and not \
            ('genre' in values and values['genre'].lower() in ['movies','film']):
            # and do the title split test
            p = ptitle.split(':', 1)
            if len(p) >1:
                self.config.log(self.config.text('fetch', 37, (ptitle, )), 64)
                # We for now put the first part in 'group name' to compare with other sources
                pgroup = p[0].strip()
                ptitle = p[1].strip()
                if self.config.write_info_files:
                    self.config.infofiles.addto_detail_list(unicode('Name split = %s + %s' % (pgroup, ptitle)))

        # Check the Title rename list
        if ptitle.lower() in self.config.titlerename:
            self.config.log(self.config.text('fetch', 38, (ptitle, self.config.titlerename[ptitle.lower()])), 64)
            if self.config.write_info_files:
                self.config.infofiles.addto_detail_list(unicode('Title renaming %s to %s\n' %
                    (ptitle, self.config.titlerename[ptitle.lower()])))

            ptitle = self.config.titlerename[ptitle.lower()]

        values['name'] = ptitle
        values['title'] = (pgroup, ptitle, psubtitle)
        if pgroup != '':
            values['group name'] = pgroup

        if psubtitle != '':
            values['episode title'] = psubtitle

        elif 'episode title' in values.keys():
            del(values['episode title'])

        return values

# end FetchData()