Python logging.getLogger() Examples

The following are 30 code examples for showing how to use logging.getLogger(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may want to check out the right sidebar which shows the related API usage.

You may also want to check out all available functions/classes of the module logging , or try the search function .

Example 1
Project: incubator-spot   Author: apache   File: utilities.py    License: Apache License 2.0 8 votes vote down vote up
def get_logger(cls,logger_name,create_file=False):

        # create logger for prd_ci
        log = logging.getLogger(logger_name)
        log.setLevel(level=logging.INFO)

        # create formatter and add it to the handlers
        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

        if create_file:
                # create file handler for logger.
                fh = logging.FileHandler('SPOT.log')
                fh.setLevel(level=logging.DEBUG)
                fh.setFormatter(formatter)
        # reate console handler for logger.
        ch = logging.StreamHandler()
        ch.setLevel(level=logging.DEBUG)
        ch.setFormatter(formatter)

        # add handlers to logger.
        if create_file:
            log.addHandler(fh)

        log.addHandler(ch)
        return  log 
Example 2
Project: mlbv   Author: kmac   File: util.py    License: GNU General Public License v3.0 7 votes vote down vote up
def init_logging(log_file=None, append=False, console_loglevel=logging.INFO):
    """Set up logging to file and console."""
    if log_file is not None:
        if append:
            filemode_val = 'a'
        else:
            filemode_val = 'w'
        logging.basicConfig(level=logging.DEBUG,
                            format="%(asctime)s %(levelname)s %(threadName)s %(name)s %(message)s",
                            # datefmt='%m-%d %H:%M',
                            filename=log_file,
                            filemode=filemode_val)
    # define a Handler which writes INFO messages or higher to the sys.stderr
    console = logging.StreamHandler()
    console.setLevel(console_loglevel)
    # set a format which is simpler for console use
    formatter = logging.Formatter("%(message)s")
    console.setFormatter(formatter)
    # add the handler to the root logger
    logging.getLogger('').addHandler(console)
    global LOG
    LOG = logging.getLogger(__name__) 
Example 3
Project: drydock   Author: airshipit   File: conftest.py    License: Apache License 2.0 7 votes vote down vote up
def setup_logging():
    # Setup root logger
    logger = logging.getLogger('drydock')
    logger.setLevel('DEBUG')
    ch = logging.StreamHandler()
    formatter = logging.Formatter(
        '%(asctime)s - %(levelname)s - %(filename)s:%(funcName)s - %(message)s'
    )
    ch.setFormatter(formatter)
    logger.addHandler(ch)

    # Specalized format for API logging
    logger = logging.getLogger('drydock.control')
    logger.propagate = False
    formatter = logging.Formatter(
        "%(asctime)s - %(levelname)s - %(user)s - %(req_id)s"
        " - %(external_ctx)s - %(end_user)s - %(message)s"
    )

    ch = logging.StreamHandler()
    ch.setFormatter(formatter)
    logger.addHandler(ch) 
Example 4
Project: incubator-spot   Author: apache   File: processing.py    License: Apache License 2.0 6 votes vote down vote up
def convert(netflow, tmpdir, opts='', prefix=None):
    '''
        Convert `nfcapd` file to a comma-separated output format.

    :param netflow : Path of binary file.
    :param tmpdir  : Path of local staging area.
    :param opts    : A set of options for `nfdump` command.
    :param prefix  : If `prefix` is specified, the file name will begin with that;
                     otherwise, a default `prefix` is used.
    :returns       : Path of CSV-converted file.
    :rtype         : ``str``
    :raises OSError: If an error occurs while executing the `nfdump` command.
    '''
    logger = logging.getLogger('SPOT.INGEST.FLOW.PROCESS')

    with tempfile.NamedTemporaryFile(prefix=prefix, dir=tmpdir, delete=False) as fp:
        command = COMMAND.format(netflow, opts, fp.name)

        logger.debug('Execute command: {0}'.format(command))
        Util.popen(command, raises=True)

        return fp.name 
Example 5
Project: incubator-spot   Author: apache   File: processing.py    License: Apache License 2.0 6 votes vote down vote up
def convert(logfile, tmpdir, opts='', prefix=None):
    '''
        Copy log file to the local staging area.

    :param logfile: Path of log file.
    :param tmpdir : Path of local staging area.
    :param opts   : A set of options for the `cp` command.
    :param prefix : If `prefix` is specified, the file name will begin with that;
                     otherwise, a default `prefix` is used.
    :returns      : Path of log file in local staging area.
    :rtype        : ``str``
    '''
    logger = logging.getLogger('SPOT.INGEST.PROXY.PROCESS')

    with tempfile.NamedTemporaryFile(prefix=prefix, dir=tmpdir, delete=False) as fp:
        command = COMMAND.format(opts, logfile, fp.name)

        logger.debug('Execute command: {0}'.format(command))
        Util.popen(command, raises=True)

        return fp.name 
Example 6
Project: incubator-spot   Author: apache   File: collector.py    License: Apache License 2.0 6 votes vote down vote up
def ingest_file(file,message_size,topic,kafka_servers):
    
    logger = logging.getLogger('SPOT.INGEST.PROXY.{0}'.format(os.getpid()))
    try:        
        message = ""
        logger.info("Ingesting file: {0} process:{1}".format(file,os.getpid())) 
        with open(file,"rb") as f:
            for line in f:
                message += line
                if len(message) > message_size:
                    KafkaProducer.SendMessage(message, kafka_servers, topic, 0)
                    message = ""
            #send the last package.        
            KafkaProducer.SendMessage(message, kafka_servers, topic, 0)
        rm_file = "rm {0}".format(file)
        Util.execute_cmd(rm_file,logger)
        logger.info("File {0} has been successfully sent to Kafka Topic: {1}".format(file,topic))

    except Exception as err:        
        logger.error("There was a problem, please check the following error message:{0}".format(err.message))
        logger.error("Exception: {0}".format(err)) 
Example 7
Project: incubator-spot   Author: apache   File: processing.py    License: Apache License 2.0 6 votes vote down vote up
def convert(pcap, tmpdir, opts='', prefix=None):
    '''
        Convert `pcap` file to a comma-separated output format.

    :param pcap    : Path of binary file.
    :param tmpdir  : Path of local staging area.
    :param opts    : A set of options for `tshark` command.
    :param prefix  : If `prefix` is specified, the file name will begin with that;
                     otherwise, a default `prefix` is used.
    :returns       : Path of CSV-converted file.
    :rtype         : ``str``
    :raises OSError: If an error occurs while executing the `tshark` command.
    '''
    logger = logging.getLogger('SPOT.INGEST.DNS.PROCESS')

    with tempfile.NamedTemporaryFile(prefix=prefix, dir=tmpdir, delete=False) as fp:
        command = COMMAND.format(pcap, opts, fp.name)

        logger.debug('Execute command: {0}'.format(command))
        Util.popen(command, raises=True)

        return fp.name 
Example 8
Project: incubator-spot   Author: apache   File: utils.py    License: Apache License 2.0 6 votes vote down vote up
def call(cls, cmd, shell=False):
        '''
            Run command with arguments, wait to complete and return ``True`` on success.

        :param cls: The class as implicit first argument.
        :param cmd: Command string to be executed.
        :returns  : ``True`` on success, otherwise ``None``.
        :rtype    : ``bool``
        '''
        logger = logging.getLogger('SPOT.INGEST.COMMON.UTIL')
        logger.debug('Execute command: {0}'.format(cmd))

        try:
            subprocess.call(cmd, shell=shell)
            return True

        except Exception as exc:
            logger.error('[{0}] {1}'.format(exc.__class__.__name__, exc.message)) 
Example 9
Project: incubator-spot   Author: apache   File: kafka_client.py    License: Apache License 2.0 6 votes vote down vote up
def _initialize_members(self, topic, server, port, zk_server, zk_port, partitions):

        # get logger isinstance
        self._logger = logging.getLogger("SPOT.INGEST.KafkaProducer")

        # kafka requirements
        self._server = server
        self._port = port
        self._zk_server = zk_server
        self._zk_port = zk_port
        self._topic = topic
        self._num_of_partitions = partitions
        self._partitions = []
        self._partitioner = None
        self._kafka_brokers = '{0}:{1}'.format(self._server, self._port)

        # create topic with partitions
        self._create_topic()

        self._kafka_conf = self._producer_config(self._kafka_brokers)

        self._p = Producer(**self._kafka_conf) 
Example 10
Project: incubator-spot   Author: apache   File: serializer.py    License: Apache License 2.0 6 votes vote down vote up
def deserialize(rawbytes):
    '''
        Deserialize given bytes according to the supported Avro schema.

    :param rawbytes: A buffered I/O implementation using an in-memory bytes buffer.
    :returns       : List of ``str`` objects, extracted from the binary stream.
    :rtype         : ``list``
    '''
    decoder = avro.io.BinaryDecoder(io.BytesIO(rawbytes))
    reader  = avro.io.DatumReader(avro.schema.parse(AVSC))

    try: return reader.read(decoder)[list.__name__]
    except Exception as exc:
        logging.getLogger('SPOT.INGEST.COMMON.SERIALIZER')\
            .error('[{0}] {1}'.format(exc.__class__.__name__, exc.message))

    return [] 
Example 11
Project: incubator-spot   Author: apache   File: serializer.py    License: Apache License 2.0 6 votes vote down vote up
def serialize(value):
    '''
        Convert a ``list`` object to an avro-encoded format.

    :param value: List of ``str`` objects.
    :returns    : A buffered I/O implementation using an in-memory bytes buffer.
    :rtype      : ``str``
    '''
    writer   = avro.io.DatumWriter(avro.schema.parse(AVSC))
    rawbytes = io.BytesIO()

    try:
        writer.write({ list.__name__: value }, avro.io.BinaryEncoder(rawbytes))
        return rawbytes
    except avro.io.AvroTypeException:
        logging.getLogger('SPOT.INGEST.COMMON.SERIALIZER')\
            .error('The type of ``{0}`` is not supported by the Avro schema.'
            .format(type(value).__name__))

    return None 
Example 12
Project: incubator-spot   Author: apache   File: hdfs_client.py    License: Apache License 2.0 6 votes vote down vote up
def __init__(self, url, mutual_auth, cert=None, verify='true', **kwargs):

        self._logger = logging.getLogger("SPOT.INGEST.HDFS_client")
        session = Session()

        if verify == 'true':
            self._logger.info('SSL verification enabled')
            session.verify = True
            if cert is not None:
                self._logger.info('SSL Cert: ' + cert)
                if ',' in cert:
                    session.cert = [path.strip() for path in cert.split(',')]
                else:
                    session.cert = cert
        elif verify == 'false':
            session.verify = False

        super(SecureKerberosClient, self).__init__(url, mutual_auth, session=session, **kwargs) 
Example 13
Project: incubator-spot   Author: apache   File: hdfs_client.py    License: Apache License 2.0 6 votes vote down vote up
def get_client(user=None):
    # type: (object) -> Client

    logger = logging.getLogger('SPOT.INGEST.HDFS.get_client')
    hdfs_nm, hdfs_port, hdfs_user = Config.hdfs()
    conf = {'url': '{0}:{1}'.format(hdfs_nm, hdfs_port)}

    if Config.ssl_enabled():
        ssl_verify, ca_location, cert, key = Config.ssl()
        conf.update({'verify': ssl_verify.lower()})
        if cert:
            conf.update({'cert': cert})

    if Config.kerberos_enabled():
        krb_conf = {'mutual_auth': 'OPTIONAL'}
        conf.update(krb_conf)

    # TODO: possible user parameter
    logger.info('Client conf:')
    for k,v in conf.iteritems():
        logger.info(k + ': ' + v)

    client = SecureKerberosClient(**conf)

    return client 
Example 14
Project: incubator-spot   Author: apache   File: iana_transform.py    License: Apache License 2.0 6 votes vote down vote up
def __init__(self,config,logger=None):

        self._logger = logging.getLogger('OA.IANA')  if logger else Util.get_logger('OA.IANA',create_file=False)        
        if COL_CLASS in config:
            self._qclass_file_path = config[COL_CLASS]
        if COL_QTYPE in config:
            self._qtype_file_path = config[COL_QTYPE]
        if COL_RCODE in config:
            self._rcode_file_path = config[COL_RCODE]
        if COL_PRESP in config:
            self._http_rcode_file_path = config[COL_PRESP]

        self._qclass_dict = {}
        self._qtype_dict = {}
        self._rcode_dict = {} 
        self._http_rcode_dict = {}
        self._init_dicts() 
Example 15
Project: wafw00f   Author: EnableSecurity   File: evillib.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def urlParser(target):
    log = logging.getLogger('urlparser')

    ssl = False
    o = urlparse(target)
    if o[0] not in ['http', 'https', '']:
        log.error('scheme %s not supported' % o[0])
        return
    if o[0] == 'https':
        ssl = True
    if len(o[2]) > 0:
        path = o[2]
    else:
        path = '/'
    tmp = o[1].split(':')
    if len(tmp) > 1:
        port = tmp[1]
    else:
        port = None
    hostname = tmp[0]
    query = o[4]
    return (hostname, port, path, query, ssl) 
Example 16
Project: backtrader-cn   Author: pandalibin   File: sina.py    License: GNU General Public License v3.0 6 votes vote down vote up
def enable_debug_requests():
    # Enabling debugging at http.client level (requests->urllib3->http.client)
    # you will see the REQUEST, including HEADERS and DATA, and RESPONSE with HEADERS but without DATA.
    # the only thing missing will be the response.body which is not logged.
    from http.client import HTTPConnection
    import logging

    HTTPConnection.debuglevel = 1
    logger.setLevel(logging.DEBUG)
    requests_log = logging.getLogger("requests.packages.urllib3")
    requests_log.setLevel(logging.DEBUG)
    requests_log.propagate = True


# 去掉注释,开启调试模式
# enable_debug_requests() 
Example 17
Project: drydock   Author: airshipit   File: promenade_client.py    License: Apache License 2.0 6 votes vote down vote up
def __init__(self, scheme='http', marker=None, timeout=None):
        self.logger = logging.getLogger(__name__)
        self.__session = requests.Session()

        self.set_auth()

        self.marker = marker
        self.__session.headers.update({'X-Context-Marker': marker})

        self.prom_url = self._get_prom_url()
        self.port = self.prom_url.port
        self.host = self.prom_url.hostname
        self.scheme = scheme

        if self.port:
            self.base_url = "%s://%s:%s/api/" % (self.scheme, self.host,
                                                 self.port)
        else:
            # assume default port for scheme
            self.base_url = "%s://%s/api/" % (self.scheme, self.host)

        self.default_timeout = self._calc_timeout_tuple((20, 30), timeout) 
Example 18
Project: BASS   Author: Cisco-Talos   File: cmdline.py    License: GNU General Public License v2.0 6 votes vote down vote up
def parse_args():
    parser = argparse.ArgumentParser(description = "Bass")
    parser.add_argument("-v", "--verbose", action = "count", default = 0, help = "Increase verbosity")
    parser.add_argument("samples", metavar = "sample", nargs = "+", help = "Sample path") 

    args = parser.parse_args()

    try:
        loglevel = {
            0: logging.ERROR,
            1: logging.WARN,
            2: logging.INFO
        }[args.verbose]
    except KeyError:
        loglevel = logging.DEBUG

    logging.basicConfig(level = loglevel)
    logging.getLogger().setLevel(loglevel)

    return args 
Example 19
Project: BASS   Author: Cisco-Talos   File: whitelist.py    License: GNU General Public License v2.0 6 votes vote down vote up
def parse_args():
    parser = argparse.ArgumentParser(description = "Add samples to BASS whitelist")
    parser.add_argument("-v", "--verbose", action = "count", default = 0, help = "Increase verbosity")
    parser.add_argument("--url", type = str, default = "http://localhost:5000", help = "URL of BASS server")
    parser.add_argument("sample", help = "Whitelist sample")

    args = parser.parse_args()

    try:
        loglevel = {
            0: logging.ERROR,
            1: logging.WARN,
            2: logging.INFO}[args.verbose]
    except KeyError:
        loglevel = logging.DEBUG
    logging.basicConfig(level = loglevel)
    logging.getLogger().setLevel(loglevel)

    return args 
Example 20
Project: BASS   Author: Cisco-Talos   File: client.py    License: GNU General Public License v2.0 6 votes vote down vote up
def parse_args():
    parser = argparse.ArgumentParser(description = "Find common ngrams in binary files")
    parser.add_argument("-v", "--verbose", action = "count", default = 0, help = "Increase verbosity")
    parser.add_argument("--output", type = str, default = None, help = "Output to file instead of stdout")
    parser.add_argument("--url", type = str, default = "http://localhost:5000", help = "URL of BASS server")
    parser.add_argument("samples", metavar = "sample", nargs = "+", help = "Cluster samples")

    args = parser.parse_args()

    try:
        loglevel = {
            0: logging.ERROR,
            1: logging.WARN,
            2: logging.INFO}[args.verbose]
    except KeyError:
        loglevel = logging.DEBUG
    logging.basicConfig(level = loglevel)
    logging.getLogger().setLevel(loglevel)

    return args 
Example 21
Project: everyclass-server   Author: everyclass   File: __init__.py    License: Mozilla Public License 2.0 6 votes vote down vote up
def init_plugins():
        """初始化日志、错误追踪、打点插件"""
        from everyclass.rpc import init as init_rpc
        from everyclass.common.flask import print_config

        # Sentry
        if plugin_available("sentry"):
            sentry.init_app(app=__app)
            sentry_handler = SentryHandler(sentry.client)
            sentry_handler.setLevel(logging.WARNING)
            logging.getLogger().addHandler(sentry_handler)

            init_rpc(sentry=sentry)
            logger.info('Sentry is inited because you are in {} mode.'.format(__app.config['CONFIG_NAME']))

        # metrics
        global statsd
        statsd = DogStatsd(namespace=f"{__app.config['SERVICE_NAME']}.{os.environ.get('MODE').lower()}",
                           use_default_route=True)

        init_rpc(logger=logger)

        print_config(__app, logger) 
Example 22
Project: cherrypy   Author: cherrypy   File: _cplogging.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __init__(self, appid=None, logger_root='cherrypy'):
        self.logger_root = logger_root
        self.appid = appid
        if appid is None:
            self.error_log = logging.getLogger('%s.error' % logger_root)
            self.access_log = logging.getLogger('%s.access' % logger_root)
        else:
            self.error_log = logging.getLogger(
                '%s.error.%s' % (logger_root, appid))
            self.access_log = logging.getLogger(
                '%s.access.%s' % (logger_root, appid))
        self.error_log.setLevel(logging.INFO)
        self.access_log.setLevel(logging.INFO)

        # Silence the no-handlers "warning" (stderr write!) in stdlib logging
        self.error_log.addHandler(NullHandler())
        self.access_log.addHandler(NullHandler())

        cherrypy.engine.subscribe('graceful', self.reopen_files) 
Example 23
Project: incubator-spot   Author: apache   File: collector.py    License: Apache License 2.0 5 votes vote down vote up
def _initialize_members(self, hdfs_app_path, kafkaproducer, conf_type):

        # getting parameters.
        self._logger = logging.getLogger('SPOT.INGEST.FLOW')
        self._hdfs_app_path = hdfs_app_path
        self._producer = kafkaproducer

        # get script path
        self._script_path = os.path.dirname(os.path.abspath(__file__))

        # read flow configuration.
        conf_file = "{0}/ingest_conf.json".format(os.path.dirname(os.path.dirname(self._script_path)))
        conf = json.loads(open(conf_file).read())
        self._conf = conf["pipelines"][conf_type]

        # set configuration.
        self._collector_path = self._conf['collector_path']        
        self._dsource = 'flow'
        self._hdfs_root_path = "{0}/{1}".format(hdfs_app_path, self._dsource)

        self._supported_files = self._conf['supported_files']

        # create collector watcher
        self._watcher = FileWatcher(self._collector_path,self._supported_files)
        
        # Multiprocessing. 
        self._processes = conf["collector_processes"]
        self._ingestion_interval = conf["ingestion_interval"]
        self._pool = Pool(processes=self._processes)
        # TODO: review re-use of hdfs.client
        self._hdfs_client = hdfs.get_client() 
Example 24
Project: incubator-spot   Author: apache   File: collector.py    License: Apache License 2.0 5 votes vote down vote up
def _initialize_members(self,hdfs_app_path,kafka_topic,conf_type):
        
        # getting parameters.
        self._logger = logging.getLogger('SPOT.INGEST.PROXY')
        self._hdfs_app_path = hdfs_app_path
        self._kafka_topic= kafka_topic

        # get script path
        self._script_path = os.path.dirname(os.path.abspath(__file__))

        # read proxy configuration.
        conf_file = "{0}/ingest_conf.json".format(os.path.dirname(os.path.dirname(self._script_path)))
        conf = json.loads(open(conf_file).read())
        self._message_size = conf["kafka"]["message_size"]
        self._conf = conf["pipelines"][conf_type]

        # get collector path.
        self._collector_path = self._conf['collector_path']

        #get supported files
        self._supported_files = self._conf['supported_files']

        # create collector watcher
        self._watcher = FileWatcher(self._collector_path,self._supported_files)

        # Multiprocessing. 
        self._processes = conf["collector_processes"]
        self._ingestion_interval = conf["ingestion_interval"]
        self._pool = Pool(processes=self._processes) 
Example 25
Project: incubator-spot   Author: apache   File: collector.py    License: Apache License 2.0 5 votes vote down vote up
def _initialize_members(self, hdfs_app_path, kafkaproducer, conf_type):

        # getting parameters.
        self._logger = logging.getLogger('SPOT.INGEST.DNS')
        self._hdfs_app_path = hdfs_app_path
        self._producer = kafkaproducer

        # get script path
        self._script_path = os.path.dirname(os.path.abspath(__file__))

        # read dns configuration.
        conf_file = "{0}/ingest_conf.json".format(os.path.dirname(os.path.dirname(self._script_path)))
        conf = json.loads(open(conf_file).read())
        self._conf = conf["pipelines"][conf_type]

        # set configuration.
        self._collector_path = self._conf['collector_path']
        self._dsource = 'dns'
        self._hdfs_root_path = "{0}/{1}".format(hdfs_app_path, self._dsource)

        # set configuration.
        self._pkt_num = self._conf['pkt_num']
        self._pcap_split_staging = self._conf['pcap_split_staging']
        self._supported_files = self._conf['supported_files']

        # create collector watcher
        self._watcher = FileWatcher(self._collector_path, self._supported_files)

        # Multiprocessing.
        self._processes = conf["collector_processes"]
        self._ingestion_interval = conf["ingestion_interval"]
        self._pool = Pool(processes=self._processes)
        # TODO: review re-use of hdfs.client
        self._hdfs_client = hdfs.get_client() 
Example 26
Project: incubator-spot   Author: apache   File: file_watcher.py    License: Apache License 2.0 5 votes vote down vote up
def __init__(self, path, supported_files, recursive):
        self._logger  = logging.getLogger('SPOT.INGEST.COMMON.FILE_WATCHER')
        self._queue   = []

        super(FileWatcher, self).__init__()

        self._logger.info('Schedule watching "{0}" directory.'.format(path))
        super(FileWatcher, self).schedule(NewFileEventHandler(self), path, recursive)

        self._regexs  = [re.compile(x) for x in supported_files]
        pattern_names = ', '.join(['"%s"' % x for x in supported_files])
        self._logger.info('Supported filenames: {0}'.format(pattern_names))

        self._logger.info('The search in sub-directories is {0}.'
            .format('enabled' if recursive else 'disabled')) 
Example 27
Project: incubator-spot   Author: apache   File: utils.py    License: Apache License 2.0 5 votes vote down vote up
def get_logger(cls, name, level='INFO', filepath=None, fmt=None):
        '''
            Return a new logger or an existing one, if any.

        :param cls     : The class as implicit first argument.
        :param name    : Return a logger with the specified name.
        :param filepath: Path of the file, where logs will be saved. If it is not set,
                         redirects the log stream to `sys.stdout`.
        :param fmt     : A format string for the message as a whole, as well as a format
                         string for the date/time portion of the message.
                         Default: '%(asctime)s %(levelname)-8s %(name)-34s %(message)s'
        :rtype         : :class:`logging.Logger`
        '''
        logger = logging.getLogger(name)
        # .............................if logger already exists, return it
        if logger.handlers: return logger

        if filepath:
            # .........................rotate log file (1 rotation per 512KB
            handler  = RotatingFileHandler(filepath, maxBytes=524288, backupCount=8)
        else:
            handler  = logging.StreamHandler(sys.stdout)

        fmt = fmt or '%(asctime)s %(levelname)-8s %(name)-34s %(message)s'
        handler.setFormatter(logging.Formatter(fmt))

        try:
            logger.setLevel(getattr(logging, level.upper() if level else 'INFO'))
        except: logger.setLevel(logging.INFO)

        logger.addHandler(handler)
        return logger 
Example 28
Project: incubator-spot   Author: apache   File: kafka_client.py    License: Apache License 2.0 5 votes vote down vote up
def _initialize_members(self, topic, server, port, zk_server, zk_port, partition):

        self._logger = logging.getLogger("SPOT.INGEST.KafkaConsumer")

        self._topic = topic
        self._server = server
        self._port = port
        self._zk_server = zk_server
        self._zk_port = zk_port
        self._id = partition
        self._kafka_brokers = '{0}:{1}'.format(self._server, self._port)
        self._kafka_conf = self._consumer_config(self._id, self._kafka_brokers) 
Example 29
Project: incubator-spot   Author: apache   File: listener.py    License: Apache License 2.0 5 votes vote down vote up
def streaming_listener(**kwargs):
    '''
        Initialize the Spark job.
    '''
    Util.get_logger('SPOT.INGEST', kwargs.pop('log_level'))

    logger  = logging.getLogger('SPOT.INGEST.COMMON.LISTENER')
    logger.info('Initializing Spark Streaming Listener...')

    dbtable = '{0}.{1}'.format(kwargs.pop('database'), kwargs['type'])
    topic   = kwargs.pop('topic')

    sc      = SparkContext(appName=kwargs['app_name'] or topic)
    logger.info('Connect to Spark Cluster as job "{0}" and broadcast variables on it.'
        .format(kwargs.pop('app_name') or topic))
    ssc     = StreamingContext(sc, batchDuration=kwargs['batch_duration'])
    logger.info('Streaming data will be divided into batches of {0} seconds.'
        .format(kwargs.pop('batch_duration')))
    hsc     = HiveContext(sc)
    logger.info('Read Hive\'s configuration to integrate with data stored in it.')

    import pipelines
    module  = getattr(pipelines, kwargs.pop('type'))
    stream  = module.StreamPipeline(ssc, kwargs.pop('zkquorum'),
                kwargs.pop('group_id') or topic, { topic: int(kwargs.pop('partitions')) })

    schema  = stream.schema
    segtype = stream.segtype

    stream.dstream\
        .map(lambda x: module.StreamPipeline.parse(x))\
        .filter(lambda x: bool(x))\
        .foreachRDD(lambda x: store(x, hsc, dbtable, topic, schema, segtype))

    ssc.start()
    logger.info('Start the execution of the streams.')
    ssc.awaitTermination() 
Example 30
Project: incubator-spot   Author: apache   File: listener.py    License: Apache License 2.0 5 votes vote down vote up
def store(rdd, hsc, dbtable, topic, schema=None, segtype='segments'):
    '''
        Interface for saving the content of the streaming :class:`DataFrame` out into
    Hive storage.

    :param rdd    : The content as a :class:`pyspark.RDD` of :class:`Row`.
    :param hsc    : A variant of Spark SQL that integrates with data stored in Hive.
    :param dbtable: The specified table in Hive database.
    :param topic  : Name of the topic to listen for incoming segments.
    :param schema : The schema of this :class:`DataFrame` as a
                    :class:`pyspark.sql.types.StructType`.
    :param segtype: The type of the received segments.
    '''
    logger = logging.getLogger('SPOT.INGEST.COMMON.LISTENER')

    if rdd.isEmpty():
        logger.info(' ---- LISTENING KAFKA TOPIC: {0} ---- '.format(topic))
        return

    hsc.setConf('hive.exec.dynamic.partition', 'true')
    hsc.setConf('hive.exec.dynamic.partition.mode', 'nonstrict')

    logger.info('Received {0} from topic. [Rows: {1}]'.format(segtype, rdd.count()))
    logger.info('Create distributed collection for partition "{0}".'
        .format(rdd.first()[0].replace('-', '').replace(' ', '')[:10]))

    df = hsc.createDataFrame(rdd, schema)
    df.write.format('parquet').mode('append').insertInto(dbtable)
    logger.info(' *** REGISTRATION COMPLETED *** ')