python source code of zbxdb

#!/usr/bin/env python3
"""
 free clonable from https://github.com/ikzelf/zbxdb/
 (@) ronald.rood@gmail.com follow @ik_zelf on twitter
 follow @zbxdb on twitter
 push your added items/checks using git
 options: -c/--cfile configFile
                     configFile contains config for 1 database and
                                a reference to the checks
          -p/--parameter parametername to list parameter from configFile
               where password shows the decrypted form of password_enc
 NOTE: a section whose name contains 'discover' is considered to be handled
           as a special case for LLD -> json arrays
 NOTE: consider using Oracle Wallet instead of coding credentials in config
 NOTE: run as a regular database client, not a special account like root or oracle
"""
import base64
import collections
import configparser
import datetime
import gc
import importlib
import json
import logging.config
import os
import platform
import resource
import socket
import sys
import threading
import time
from argparse import ArgumentParser
# from pdb import set_trace
from timeit import default_timer as timer

import sqlparse

VERSION = "2.07"


def setup_logging(
        default_path='etc/logging.json',
        default_level=logging.INFO,
        env_key='LOG_CFG'
):
    """Setup logging configuration

    """
    path = default_path
    value = os.getenv(env_key, None)

    if value:
        path = value

    if os.path.exists(path):
        with open(path, 'rt') as _f:
            config = json.load(_f)
            try:
                logging.config.dictConfig(config)
            except ValueError as _e:
                print("Error during reading log configuration")
                print(config)
                print("Does the path for filename exist?")
                raise

            return path
    print("Falling back to default logging config")
    logging.basicConfig(level=default_level)

    return False


def set_logfile(_l, _file):
    """change filename of logfile handler
    """

    for _h in _l.root.handlers:
        if isinstance(_h, logging.FileHandler):
            _h.baseFilename = os.path.join(
                os.path.dirname(_h.baseFilename),
                os.path.basename(_file)
            )
            _l.info("Continue logging in %s", _h.baseFilename)
            _h.close()


def to_outfile(_c, ikey, values):
    """uniform way to generate the output for items"""
    timestamp = int(time.time())

    if os.path.exists(_c['out_file']):
        if not _c['OUTF']:
            _c['OUTF'] = open(_c['out_file'], "a")
    else:
        _c['OUTF'] = open(_c['out_file'], "w")
    try:
        _c['OUTF'].write(_c['hostname'] + ' "' + ikey + '" ' +
                         str(timestamp) + ' ' + str(values) + '\n')
    except TypeError:
        LOGGER.error("%s TypeError in sql %s from section %s\n",
                     _c['ME'],
                     _c['key'], _c['section']
                     )
        _c['OUTF'].write(_c['hostname'] + " query[" + _c['section']+","+_c['key'] + ",status] " +
                         str(timestamp) + " " + "TypeError" + "\n")
    _c['OUTF'].flush()


class MyConfigParser(configparser.RawConfigParser):  # pylint: disable=too-many-ancestors
    """strip comments"""

    def __init__(self):
        configparser.RawConfigParser.__init__(
            self, inline_comment_prefixes=('#', ';'))


def encrypted(plain):
    """encrypt plaintext password"""

    return base64.b64encode(bytes(plain, 'utf-8'))


def decrypted(pw_enc):
    """return decrypted password"""

    return base64.b64decode(pw_enc).decode("utf-8", "ignore")


def get_config_par(_c, _parameter, _me):
    """get a parameter value
    """
    try:
        _v = os.path.expandvars(_c.get(_me, _parameter))
    except configparser.NoOptionError:
        _v = ""

    return _v


def get_config(filename, _me):
    """read the specified configuration file"""
    config = {'db_url': "", 'db_type': "", 'db_driver': "", 'instance_type': "rdbms",
              'server': "", 'db_name': "", 'instance_name': "", 'server_port': "",
              'username': "scott", 'password': "", 'role': "normal", 'omode': 0,
              'out_dir': "", 'out_file': "", 'hostname': "", 'checks_dir': "",
              'site_checks': "", 'password_enc': "", 'OUTF': 0, 'ME': _me,
              'cafile': "",
              'section': "", 'key': "",
              'sqltimeout': 60.0}
    _config = MyConfigParser()

    if not os.path.exists(filename):
        raise ValueError("Configfile " + filename + " does not exist")

    _inif = open(filename, 'r')
    _config.read_file(_inif)

    for _i in config:
        _v = get_config_par(_config, _i, _me)

        if _v:
            config[_i] = _v

    enc = config['password_enc']
    config['password_enc'] = bytearray(enc, 'utf-8')

    config['out_file'] = os.path.join(config['out_dir'],
                                      str(os.path.splitext(os.path.basename(filename))[0]) +
                                      ".zbx")

    if config['site_checks'] == "NONE":
        config['site_checks'] = ""
    _f = config['sqltimeout']
    config['sqltimeout'] = float(_f)

    _inif.close()

    if config['password'] != "":
        enc = encrypted(config['password'])
        _inif = open(filename, 'w')
        _config.set(_me, 'password', '')
        _config.set(_me, 'password_enc', enc.decode())
        _config.write(_inif)
        _inif.close()

    return config


def cancel_sql(_c, _s, _k):
    '''Cancel long running SQL
    '''
    LOGGER.warning("%s cancel_sql %s %s\n", ME, _s, _k)
    _c.cancel()
    LOGGER.warning("%s canceled   %s %s\n", ME, _s, _k)
    # raise zbx_exception("sql_timeout")


def load_driver(_c):
    """dynamically load the driver"""

    try:
        _db_driver = __import__(_c['db_driver'])
        LOGGER.info(_db_driver)
    except:
        LOGGER.critical("%s supported will be oracle(cx_Oracle), postgres(psycopg2), "
                        "mysql(mysql.connector), mssql(pymssql/_mssql), db2(ibm_db_dbi)\n",
                        _c['ME'])
        LOGGER.critical(
            "%s tested are oracle(cx_Oracle), postgres(psycopg2)\n", _c['ME'])
        LOGGER.critical(
            "Don't forget to install the drivers first ...\n", exc_info=True)
        raise

    LOGGER.info("%s driver %s loaded\n", ME, _c['db_driver'])

    return _db_driver


def load_driver_errors(_c):
    """dynamically load driver errors handler"""
    try:
        driver_errors = importlib.import_module(
            "drivererrors." + _c['db_driver'])
    except:
        LOGGER.critical("Failed to load driver error routines\n")
        LOGGER.critical("Looked in %s\n", sys.path, exc_info=True)
        raise

    LOGGER.info("%s driver drivererrors for %s loaded\n",
                ME, _c['db_driver'])

    return driver_errors


def load_db_connections(_c):
    """ dynamically load db connection tools"""
    try:
        db_connections = importlib.import_module(
            "dbconnections." + _c['db_type'])
    except:
        LOGGER.critical(
            "Failed to load dbconnections routines for %s\n", _c['db_type'])
        LOGGER.critical("Looked in %s\n", sys.path, exc_info=True)
        raise

    LOGGER.info("%s dbconnections for %s loaded\n",
                ME, _c['db_type'])

    return db_connections


def main():  # pylint: disable=too-many-statements,too-many-branches,too-many-locals
    """main routine"""

    if int(platform.python_version().split('.')[0]) < 3:
        LOGGER.fatal("%s needs at least python version 3, currently %s",
                     ME, platform.python_version())
        sys.exit(1)

    start_time = int(time.time())
    _parser = ArgumentParser()
    _parser.add_argument("-c", "--cfile", dest="configfile", default=ME+".cfg",
                         help="Configuration file", metavar="FILE", required=True)
    _parser.add_argument("-v", "--verbosity", action="count", default=0,
                         help="increase output verbosity overriding the default")
    _parser.add_argument("-p", "--parameter", action="store",
                         help="show parameter from configfile")
    _args = _parser.parse_args()

    set_logfile(LOGGER, _args.configfile+".log")

    _config = get_config(_args.configfile, ME)

    if _args.parameter:
        if _args.parameter == 'password':
            print('parameter {}: {}\n'.format(_args.parameter,
                                              decrypted(_config[_args.parameter+'_enc'])))
        else:
            print('parameter {}: {}\n'.format(
                _args.parameter, _config[_args.parameter]))
        sys.exit(0)

    if _args.verbosity:
        newLevel = logging.getLogger().getEffectiveLevel() - (_args.verbosity*10)

        if newLevel < 0:
            newLevel = 0
        LOGGER.warning("Changing loglevel from %d to %d", logging.getLogger().getEffectiveLevel(),
                       newLevel)
        logging.getLogger().setLevel(newLevel)
    LOGGER.debug("log level %d", logging.getLogger().getEffectiveLevel())
    LOGGER.warning("start python-%s %s-%s pid=%s Connecting ...\n",
                   platform.python_version(), ME, VERSION, os.getpid()
                   )

    if _config['password']:
        LOGGER.warning(
            "first encrypted the plaintext password and removed from config\n")
    # we need the password ....
    _config['password'] = decrypted(_config['password_enc'])

# add a few seconds extra to allow the driver timeout handling to do the it's job.
# for example, cx_oracle has a cancel routine that we call after a timeout. If
# there is a network problem, the cancel gets a ORA-12152: TNS:unable to send break message
# setting this defaulttimeout should speed this up
    socket.setdefaulttimeout(_config['sqltimeout']+3)

    LOGGER.warning("%s found db_type=%s, driver %s; checking for driver\n",
                   ME,
                   _config['db_type'], _config['db_driver'])

    if not os.path.exists(
            os.path.join(_config['checks_dir'], _config['db_type'])):
        raise ValueError("db_type "+_config['db_type'] +
                         " does not exist in the "+_config['checks_dir']+" directory")
    db_driver = load_driver(_config)
    driver_errors = load_driver_errors(_config)
    db_connections = load_db_connections(_config)
    LOGGER.info(db_connections)
    LOGGER.info(driver_errors)

    LOGGER.info("hostname in zabbix: %s", _config['hostname'])
    #  hide password, hoping username != password ;-)
    LOGGER.info("connect string    : %s\n",
                db_connections.connect_string(_config).replace(_config['password'],
                                                               '******'))
    LOGGER.info('using sql_timeout : %ds\n', _config['sqltimeout'])
    LOGGER.info("out_file          : %s\n", _config['out_file'])

    if _config['site_checks']:
        LOGGER.info("site_checks       : %s\n", _config['site_checks'])

    if LOG_CONF:
        sys_files = 4
    else:
        sys_files = 3
    check_files = [{'name': __file__, 'lmod': os.path.getmtime(__file__)},
                   {'name': db_connections.__file__,
                    'lmod': os.path.getmtime(db_connections.__file__)},
                   {'name': driver_errors.__file__,
                    'lmod': os.path.getmtime(driver_errors.__file__)},
                   {'name': LOG_CONF,
                    'lmod': os.path.getmtime(LOG_CONF)}
                   ]

    if LOG_CONF:
        check_files.append(
            {'name': LOG_CONF, 'lmod': os.path.getmtime(LOG_CONF)})

    for i in range(sys_files):
        to_outfile(_config,
                   "{}[checks,{},name]".format(ME, i), check_files[i]['name'])
        to_outfile(_config,
                   "{}[checks,{},lmod]".format(ME, i), int(check_files[i]['lmod']))

    conn_counter = 0
    conn_errors = 0
    query_counter = 0
    query_errors = 0

    sleep_c = 0
    sleep_s = 1
    prev_err = 0

    while True:
        try:
            for i in range(sys_files):
                if check_files[i]['lmod'] != os.stat(check_files[i]['name']).st_mtime:
                    LOGGER.warning("%s Changed, from %s to %s restarting ..\n",
                                   check_files[i]['name'],
                                   time.ctime(check_files[i]['lmod']),
                                   time.ctime(os.path.getmtime(check_files[i]['name'])))
                    os.execv(__file__, sys.argv)

            # reset list in case of a just new connection that reloads the config
            check_files = [{'name': __file__, 'lmod': os.path.getmtime(__file__)},
                           {'name': db_connections.__file__,
                            'lmod': os.path.getmtime(db_connections.__file__)},
                           {'name': driver_errors.__file__,
                            'lmod': os.path.getmtime(driver_errors.__file__)}]

            if LOG_CONF:
                check_files.append(
                    {'name': LOG_CONF, 'lmod': os.path.getmtime(LOG_CONF)})

            _config = get_config(_args.configfile, ME)
            _config['password'] = decrypted(_config['password_enc'])

            _start = timer()

            #  hide password, hoping username != password ;-)
            LOGGER.info('connecting to %s\n',
                        db_connections.connect_string(_config).replace(_config['password'],
                                                                       '******'))
            conn_has_cancel = False
            _conn = db_connections.connect(db_driver, _config)

            if "cancel" in dir(_conn):
                conn_has_cancel = True
            LOGGER.info(_conn)
            conn_counter += 1
            _cursor = _conn.cursor()
            connect_info = db_connections.connection_info(_conn)
            LOGGER.info('connected db_url %s type %s db_role %s version %s\n'
                        '%s user %s %s sid,serial %d,%d instance %s as %s cancel:%s\n',
                        _config['db_url'], connect_info['instance_type'],
                        connect_info['db_role'],
                        connect_info['dbversion'],
                        datetime.datetime.fromtimestamp(time.time()),
                        _config['username'], connect_info['uname'],
                        connect_info['sid'],
                        connect_info['serial'],
                        connect_info['iname'],
                        _config['role'], conn_has_cancel)

            if connect_info['db_role'] in ["PHYSICAL STANDBY", "SLAVE"]:
                checks_file = os.path.join(_config['checks_dir'],
                                           _config['db_type'], "standby" +
                                           "." + connect_info['dbversion'] + ".cfg")
            else:
                checks_file = os.path.join(_config['checks_dir'],
                                           _config['db_type'],
                                           connect_info['db_role'].lower() + "." +
                                           connect_info['dbversion']+".cfg")

            _files = [checks_file]
            check_files.append({'name': checks_file, 'lmod': 0})

            if _config['site_checks']:
                for addition in _config['site_checks'].split(","):
                    addfile = os.path.join(_config['checks_dir'], _config['db_type'],
                                           addition + ".cfg")
                    check_files.append({'name': addfile, 'lmod': 0})
                    _files.extend([addfile])
            LOGGER.info('using checks from %s\n', _files)

            for checks_file in check_files:
                if not os.path.exists(checks_file['name']):
                    raise ValueError(
                        "Configfile " + checks_file['name'] + " does not exist")
            # all checkfiles exist

            sleep_c = 0
            sleep_s = 1
            prev_err = 0
            con_mins = 0
            open_time = int(time.time())

            while True:
                if not os.path.exists(_args.configfile):
                    LOGGER.warning("Config file (%s) gone ... time to quit", _args.configfile)
                    sys.exit(0)
                LOGGER.debug("%s while True\n", ME)

                if connect_info['db_role'] != db_connections.current_role(_conn, connect_info):
                    LOGGER.info("db_role changed from %s to %s",
                                connect_info['db_role'],
                                db_connections.current_role(_conn, connect_info))
                    # re connect to get the correct monitoring config again

                    break
                # keep this to compare for when to dump stats
                now_run = int(time.time())
                run_timer = timer()  # keep this to compare for when to dump stats
                # loading checks from the various checkfiles:
                need_to_load = "no"

                # pylint: disable=consider-using-enumerate

                for i in range(len(check_files)):  # at 0 - sys_files is the script itself
                    try:
                        current_lmod = os.path.getmtime(check_files[i]['name'])
                    except OSError as _e:
                        LOGGER.warning("%s: %s\n",
                                       check_files[i]['name'], _e.strerror)
                        # ignore the error, maybe temporary due to an update
                        current_lmod = check_files[i]['lmod']

                    if check_files[i]['lmod'] != current_lmod:
                        if i < sys_files:  # it is the script, a module or LOG_CONF
                                           # that changed
                            LOGGER.warning("%s changed, from %s to %s restarting ...\n",
                                           check_files[i]['name'],
                                           time.ctime(check_files[i]['lmod']),
                                           time.ctime(current_lmod))
                            os.execv(__file__, sys.argv)
                        else:
                            if check_files[i]['lmod'] == 0:
                                LOGGER.info("checks loading %s\n",
                                            check_files[i]['name'])
                                need_to_load = "yes"
                            else:
                                LOGGER.warning("checks changed, reloading %s\n",
                                               check_files[i]['name'])
                                need_to_load = "yes"

                if need_to_load == "yes":
                    to_outfile(_config, ME + "[version]", VERSION)
                    to_outfile(
                        _config, ME + "[config,db_type]", _config['db_type'])
                    to_outfile(
                        _config, ME + "[config,db_driver]", _config['db_driver'])
                    to_outfile(
                        _config, ME + "[config,instance_type]", _config['instance_type'])
                    to_outfile(_config, ME + "[conn,db_role]",
                               connect_info['db_role'])
                    to_outfile(
                        _config, ME + "[conn,instance_type]", connect_info['instance_type'])
                    to_outfile(_config, ME + "[conn,dbversion]",
                               connect_info['dbversion'])
                    to_outfile(
                        _config, ME + "[connect,instance_name]", connect_info['iname'])
                    # sometimes the instance_name query follows within a second
                    # missing event so give it some more time
                    time.sleep(3)
                    objects_list = []
                    sections_list = []
                    file_list = []
                    all_checks = []

                    for i in range(len(check_files)):
                        _e = collections.OrderedDict()
                        _e = {"{#CHECKS_FILE}": i}
                        file_list.append(_e)

                    files_json = '{\"data\":'+json.dumps(file_list)+'}'
                    to_outfile(_config, ME+".files.lld", files_json)

                    for i in range(sys_files, len(check_files)):
                        # #0 is executable that is also checked for updates
                        # #1 db_connections module
                        # #2 driver_errors module
                        # #3 LOG_CONF if it exists ...
                        # so, skip those and pick the real check_files
                        _checks = configparser.RawConfigParser()
                        try:
                            check_file = open(check_files[i]['name'], 'r')
                            to_outfile(_config, "{}[checks,{},name]".format(ME, i),
                                       check_files[i]['name'])
                            to_outfile(_config, "{}[checks,{},lmod]".format(ME, i),
                                       str(int(os.stat(check_files[i]['name']).st_mtime)))
                            try:
                                _checks.read_file(check_file)
                                check_file.close()
                                to_outfile(_config, ME + "[checks," + str(i) +
                                           ",status]", 0)
                            except configparser.Error:
                                to_outfile(_config, ME + "[checks," + str(i) +
                                           ",status]", 13)
                                LOGGER.critical("file %s has parsing errors ->(13)\n",
                                                check_files[i]['name'])
                        except IOError as io_error:
                            to_outfile(
                                _config, ME + "[checks," + str(i) + ",status]", 11)
                            LOGGER.critical("file %s IOError %s %s ->(11)\n",
                                            check_files[i]['name'],
                                            io_error.errno, io_error.strerror)

                        check_files[i]['lmod'] = os.stat(
                            check_files[i]['name']).st_mtime
                        all_checks.append(_checks)

                        for section in sorted(_checks.sections()):
                            sec_mins = int(_checks.get(section, "minutes"))

                            if sec_mins == 0:
                                LOGGER.info(
                                    "%s run at connect only\n", section)
                            else:
                                LOGGER.info("%s run every %d minutes\n",
                                            section, sec_mins)
                            # dump own discovery items of the queries per section
                            _e = collections.OrderedDict()
                            _e = {"{#SECTION}": section}
                            sections_list.append(_e)
                            _x = dict(_checks.items(section))

                            for key, sqls in sorted(_x.items()):
                                if sqls and key != "minutes":
                                    _d = collections.OrderedDict()
                                    _d = {"{#SECTION}": section, "{#KEY}": key}
                                    objects_list.append(_d)
                                    LOGGER.info("%s: %s\n",
                                                key,
                                                sqls[0: 60].
                                                replace('\n', ' ').replace('\r', ' '))
                    # checks are loaded now.
                    sections_json = '{\"data\":'+json.dumps(sections_list)+'}'
                    LOGGER.debug("lld key: %s json: %s\n",
                                 ME+".lld", sections_json)
                    to_outfile(_config, ME+".section.lld", sections_json)
                    rows_json = '{\"data\":'+json.dumps(objects_list)+'}'
                    LOGGER.debug("lld key: %s json: %s\n",
                                 ME+".lld", rows_json)
                    to_outfile(_config, ME + ".query.lld", rows_json)
                    # sqls can contain multiple statements per key. sqlparse to split them
                    # now. Otherwise use a lot of extra cycles when splitting at runtime
                    # all_sql { {section, key}: statements }
                    all_sql = {}

                    for _checks in all_checks:
                        for section in sorted(_checks.sections()):
                            _x = dict(_checks.items(section))

                            for key, sqls in sorted(_x.items()):
                                if sqls and key != "minutes":
                                    all_sql[(section, key)] = []

                                    for statement in sqlparse.split(sqls):
                                        all_sql[(section, key)].append(
                                            statement)

                # checks discovery is also printed
                #
                to_outfile(_config, ME + "[uptime]",
                           int(time.time() - start_time))
                to_outfile(_config, ME + "[opentime]",
                           int(time.time() - open_time))

                # the connect status is only real if executed a query ....

                for _checks in all_checks:
                    for section in sorted(_checks.sections()):
                        section_timer = timer()  # keep this to compare for when to dump stats
                        sec_mins = int(_checks.get(section, "minutes"))

                        if ((con_mins == 0 and sec_mins == 0) or
                                (sec_mins > 0 and con_mins % sec_mins == 0)):
                            # time to run the checks again from this section
                            _x = dict(_checks.items(section))
                            _cursor = _conn.cursor()

                            for key, sqls in sorted(_x.items()):
                                if sqls and key != "minutes":
                                    LOGGER.debug("%s section: %s key: %s\n",
                                                 ME, section, key)
                                    try:
                                        query_counter += 1

                                        if conn_has_cancel:
                                            # pymysql has no cancel but does have
                                            # timeout in connect
                                            sqltimeout = threading.Timer(
                                                _config['sqltimeout'],
                                                cancel_sql, [_conn, section, key])
                                            sqltimeout.start()
                                        _start = timer()

                                        for statement in all_sql[(section, key)]:

                                            LOGGER.debug("%s section: %s key: %s sql: %s\n",
                                                         ME, section, key, statement)
                                            _cursor.execute(statement)
                                        startf = timer()
                                        # output for the last query must include the
                                        # output for the preparing queries is ignored
                                        #        complete key and value
                                        rows = _cursor.fetchall()

                                        if conn_has_cancel:
                                            sqltimeout.cancel()

                                        if "discover" in section:
                                            objects_list = []

                                            for row in rows:
                                                _d = collections.OrderedDict()

                                                for col in range(len(_cursor.description)):
                                                    _d[_cursor.description[col]
                                                       [0]] = row[col]
                                                objects_list.append(_d)
                                            rows_json = '{\"data\":' + \
                                                json.dumps(objects_list)+'}'
                                            LOGGER.debug("DEBUG lld key: %s json: %s\n", key,
                                                         rows_json)
                                            to_outfile(_config, key, rows_json)
                                            to_outfile(_config, ME +
                                                       "[query," + section + "," +
                                                       key + ",status]", 0)
                                        else:
                                            if rows and len(rows[0]) == 2:
                                                _config['section'] = section
                                                _config['key'] = key

                                                for row in rows:
                                                    to_outfile(
                                                        _config, row[0], row[1])
                                                to_outfile(_config, ME +
                                                           "[query," + section + "," +
                                                           key + ",status]", 0)
                                            elif not rows:
                                                to_outfile(_config, ME + "[query," +
                                                           section + "," +
                                                           key + ",status]", 0)
                                            else:
                                                LOGGER.critical('key=%s.%s ZBXDB-%d: '
                                                                'SQL format error: %s\n',
                                                                section, key, 2,
                                                                "expect key,value pairs")
                                                to_outfile(_config, ME +
                                                           "[query," + section + "," +
                                                           key + ",status]", 2)
                                        _config['section'] = ""
                                        _config['key'] = ""
                                        fetchela = timer() - startf
                                        elapsed_s = timer() - _start
                                        to_outfile(_config, ME + "[query," +
                                                   section + "," +
                                                   key + ",ela]", elapsed_s)
                                        to_outfile(_config, ME + "[query," +
                                                   section + "," +
                                                   key + ",fetch]", fetchela)
                                    # except (db_driver.DatabaseError,
                                        # socket.timeout) as dberr:
                                    except Exception as dberr:
                                        if conn_has_cancel:
                                            sqltimeout.cancel()
                                        ecode, emsg = driver_errors.db_errorcode(
                                            db_driver, dberr)

                                        elapsed_s = timer() - _start
                                        query_errors += 1
                                        to_outfile(_config, ME + "[query," +
                                                   section + "," +
                                                   key + ",status]", ecode)
                                        to_outfile(_config, ME + "[query," +
                                                   section + "," +
                                                   key + ",ela]", elapsed_s)
                                        LOGGER.info('key=%s.%s ZBXDB-%s: '
                                                    'Db execution error: %s\n',
                                                    section, key, ecode, emsg.strip())

                                        if driver_errors.db_error_needs_new_session(db_driver,
                                                                                    ecode):
                                            raise

                                        LOGGER.debug("%s commit\n", ME)
                                        _conn.commit()

                                        LOGGER.debug("%s committed\n", ME)
                            # end of a section ## time to run the checks again from this section
                            to_outfile(_config, ME + "[query," + section + ",,ela]",
                                       timer() - section_timer)
                # release locks that might have been taken

                LOGGER.debug("%s commit 2\n", ME)

                _conn.commit()

                LOGGER.debug("%s committed.\n", ME)
                # dump metric for summed elapsed time of this run
                to_outfile(_config, ME + "[query,,,ela]",
                           timer() - run_timer)
                to_outfile(_config, ME + "[cpu,user]",
                           resource.getrusage(resource.RUSAGE_SELF).ru_utime)
                to_outfile(_config, ME + "[cpu,sys]",
                           resource.getrusage(resource.RUSAGE_SELF).ru_stime)
                to_outfile(_config, ME + "[mem,maxrss]",
                           resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
                # passed all sections
                to_outfile(_config, ME + "[connect,status]", 0)

                if ((now_run - start_time) % 3600) == 0:
                    gc.collect()
                    # dump stats
                    LOGGER.info("connect %d times, %d fail; started %d queries, "
                                "%d fail memrss:%d user:%f sys:%f\n",
                                conn_counter, conn_errors, query_counter,
                                query_errors,
                                resource.getrusage(
                                    resource.RUSAGE_SELF).ru_maxrss,
                                resource.getrusage(
                                    resource.RUSAGE_SELF).ru_utime,
                                resource.getrusage(resource.RUSAGE_SELF).ru_stime)
                # try to keep activities on the same starting second:
                sleep_time = 60 - ((int(time.time()) - start_time) % 60)

                LOGGER.debug("Sleeping for %d seconds\n", sleep_time)
                time.sleep(sleep_time)
                con_mins = con_mins + 1  # not really mins since the checks could
                #                       have taken longer than 1 minute to complete
            # end while True
        # except (db_driver.DatabaseError, socket.timeout, ConnectionResetError) as dberr:
        except Exception as dberr:
            err_code, err_msg = driver_errors.db_errorcode(db_driver, dberr)
            elapsed_s = timer() - _start
            if err_code == 0:
                # something fishy happened .... driver problems?
                to_outfile(_config, ME + "[connect,status]", err_msg[:200])
            else:
                to_outfile(_config, ME + "[connect,status]", err_code)


            if not driver_errors.db_error_needs_new_session(db_driver, err_code):
                # from a killed session, crashed instance or similar
                conn_errors += 1

            if prev_err != err_code:
                sleep_c = 0
                sleep_s = 1
                prev_err = err_code
            sleep_c += 1

            if sleep_c >= 10:
                if sleep_s <= 301:
                    # don't sleep longer than 5 mins after connect failures
                    sleep_s += 10
                sleep_c = 0
            LOGGER.error('(%d.%d)connection error: [%s] %s for %s@%s\n',
                           sleep_c, sleep_s, err_code,
                           err_msg.strip().replace('\n', ' ').replace('\r', ' '),
                           _config['username'], _config['db_url'])
            # uptime is used for nodata item .... so also give when not
            # connected
            to_outfile(_config, ME + "[uptime]", int(time.time() - start_time))
            # set_trace()
            time.sleep(sleep_s)
        except (KeyboardInterrupt, SystemExit):
            exit(0)


ME = os.path.splitext(os.path.basename(__file__))[0]
LOG_CONF = setup_logging()
LOGGER = logging.getLogger(__name__)

if __name__ == '__main__':
    try:
        main()
    except Exception:  # pylint: disable=broad-except
        LOGGER.fatal("problem", exc_info=True)