python source code of stream

ztag-master
- ANNOTATION-COOKBOOK.md
- run_tests
- LICENSE
- dev-requirements.txt
- ztag
  - log.py
  - decoders
    - decoders.py
    - __init__.py
  - transformer.py
  - classargs.py
  - annotation.py
  - template_http.py
  - encoders
    - protobuf.py
    - __init__.py
    - encoders.py
  - transform.py
  - schema.py
  - test
    - protocols_test.py
    - updater_test.py
    - certkey_public_key_workaround_test.py
    - __init__.py
    - encoding_test.py
  - __main__.py
  - protocols.py
  - devices
    - FtpLinksys_1.json
    - FtpCerebusFtpd_1.json
    - FtpBftpd_2.json
    - device_with_telnet.json
    - huawei_38e5.json
    - FtpIis_1.json
    - netgear_prosafe.json
    - tomcat.json
    - FtpRouterBoard_1.json
    - FtpIpTime_2.json
    - FtpDreambox_2.json
    - device_with_smb.json
    - FtpDlink_1.json
    - flexim.json
    - FtpTitanFtpd_1.json
    - FtpVxWorks_1.json
    - FtpDdwrt_1.json
    - device_with_dnp3.json
    - FtpEcosense_1.json
    - FtpHp_3.json
    - FtpTenor_1.json
    - FtpZyXel_1.json
    - FtpVsFtpd_3.json
    - FtpVxWorks_2.json
    - dell_b2360dn_laser_printer.json
    - FtpOverlandStorage_1.json
    - FtpAsus_2.json
    - FtpDreambox_1.json
    - FtpTypSoft_1.json
    - FtpProFtpd_2.json
    - FtpRicoh_2.json
    - FtpLutron_1.json
    - device_with_upnp.json
    - FtpTelindusFtpd_1.json
    - hp_laserjet_p3005.json
    - agranat_emweb.json
    - dell_idrac.json
    - hp_laserjet_4250.json
    - FtpPureFtpd_2.json
    - dell_chassis_management_controller.json
    - device_with_mysql.json
    - FtpLexmark_1.json
    - device_with_smtp.json
    - FtpSeagate_1.json
    - computec_cws.json
    - actl.json
    - device_with_ipp.json
    - FtpTnFtpd_1.json
    - FtpWesternDigital_1.json
    - creston_cp2e.json
    - FtpLinksys_2.json
    - FtpAlcatel_1.json
    - FtpPureFtpd_1.json
    - wurm_multigate.json
    - cisco_https.json
    - cisco_server.json
    - nginx.json
    - axis_2120_network_camera.json
    - hp_laserjet_1320.json
    - FtpBelkin_1.json
    - hp_laserjet_p4014.json
    - hp_officejet_pro_l7500.json
    - FtpGene6Ftpd_1.json
    - qnx_npm6.json
    - device_with_oracle.json
    - crouzet.json
    - FtpAllworks_1.json
    - cisco_ios_server.json
    - FtpLatronix_1.json
    - vykon.json
    - FtpNetGear_1.json
    - FtpFritz_2.json
    - FtpDreambox_3.json
    - wattstopper.json
    - FtpApc_1.json
    - device_with_fox.json
    - lantronix_xport.json
    - FtpSoftAtHome_1.json
    - FtpOpto22_1.json
    - hp_laserjet_4700.json
    - FtpAxis_1.json
    - device_with_mssql.json
    - FtpGenericDsl_2.json
    - FtpZxfs_2.json
    - known_key.json
    - ubuntu_OpenSSH_5.9p1_debian_ubuntu.json
    - FtpZxfs_1.json
    - entes_rg_xxcs.json
    - device_with_imap.json
    - FtpVodaphone_1.json
    - FtpAsus_1.json
    - FtpXerox_2.json
    - crouzet_xn05.json
    - qnx_jace.json
    - FtpXerox_1.json
    - device_with_postgres.json
    - FtpSony_2.json
    - FtpDell_2.json
    - device_with_ssh.json
    - FtpFritz_1.json
    - FtpWFtpd_1.json
    - netgear_smart_switch.json
    - FtpNcFtpd_1.json
    - panasonic_fpwebd.json
    - FtpSharp_1.json
    - FtpAsus_3.json
    - apc_application_error.json
    - FtpFullrate_1.json
    - FtpBulletproofFtpd_1.json
    - schneider_nf3000.json
    - dell_2130cn_color_laser.json
    - dell_color_laser_3110cn.json
    - FtpWsFtpd_1.json
    - FtpMikroTikFtpd_1.json
    - FtpIntercon_1.json
    - american_megatrends.json
    - FtpTrimbleFtpd_1.json
    - FtpRicoh_1.json
    - device_with_cwmp.json
    - coyote.json
    - device_with_http.json
    - FtpQnapTurboNas_3.json
    - FtpGenericCamera_2.json
    - FtpKonicaMinolta_1.json
    - stripstarttls.json
    - rockwell_141.json
    - konica_minolta_bizhub_c454e.json
    - brother_mfc_8950dtw.json
    - hpe_officeconnect_switch_1820.json
    - FtpLeightronix_1.json
    - canon_lbp6230dn.json
    - thinksimple.json
    - FtpBftpd_1.json
    - FtpWarFtpd_1.json
    - FtpSeagate_2.json
    - ezproxy_server.json
    - FtpZte_1.json
    - FtpUClinux_1.json
    - thecus_n7700.json
    - FtpGenericCamera_1.json
    - FtpIbm_1.json
    - FtpProFtpd_1.json
    - FtpLacie_1.json
    - abb_stotz_kontakt.json
    - FtpXlight_1.json
    - ubuntu_OpenSSH_6.6p1_ubuntu_ubuntu.json
    - FtpServuFtpd_2.json
    - device_with_pop3.json
    - device_with_bacnet.json
    - FtpWindRiver_1.json
    - FtpLacie_2.json
    - ubuntu_OpenSSH_6.6p1_ubuntu.json
    - FtpDrayTek_1.json
    - FtpIqeye_1.json
    - entes_mpr_63.json
    - device_with_imaps.json
    - FtpDell_1.json
    - FtpGenericUpdate_1.json
    - FtpAxis_2.json
    - FtpCesarFtpd_1.json
    - synchronic.json
    - digi_one.json
    - apple_server.json
    - FtpServuFtpd_1.json
    - FtpQnapTurboNas_2.json
    - FtpSynology_2.json
    - FtpSpeedPort_1.json
    - hp_networks.json
    - raspbian.json
    - device_with_mongodb.json
    - FtpWesternDigital_2.json
    - hp_laserjet_400.json
    - synology_https.json
    - FtpLacie_3.json
    - bb_network_camera.json
    - FtpVsFtpd_1.json
    - FtpWarFtpd_2.json
    - FtpDlink_2.json
    - FtpGenericDsl_1.json
    - FtpIpTime_1.json
    - device_with_https_get.json
    - FtpFilezilla_1.json
    - apache_http.json
    - iis7.json
    - FtpAlcatel_2.json
    - FtpNationalInstruments_1.json
    - FtpNetApp_1.json
    - microsoft_exchange_2007.json
    - avtech_room_alert_32ew.json
    - brother_hl_6180dw.json
    - se_electronic_g_02_90_00.json
    - hp_color_laserjet_mfp_m476dw.json
    - FtpSynology_1.json
    - device_with_pop3s.json
    - FtpHp_1.json
    - hp_printer_server.json
    - FtpHp_2.json
    - FtpNucleus_1.json
    - nginx_simple.json
    - honeywell_notifier.json
    - FtpSony_1.json
    - FtpQnapTurboNas_1.json
    - FtpVsFtpd_2.json
    - device_with_https.json
    - device_with_dns.json
    - FtpMaygion_1.json
    - device_with_ftp.json
    - device_with_s7.json
    - computec.json
  - annotations
    - viavideoweb.py
    - integ_jnior_310.py
    - FtpLinksys.py
    - FtpWindRiver.py
    - FtpSynology.py
    - openssh_ubuntu.py
    - brother.py
    - litespeed.py
    - FtpBulletproofFtpd.py
    - lacie.py
    - FtpVxWorks.py
    - FtpXlight.py
    - FtpWsFtpd.py
    - FtpCesarFtpd.py
    - FtpServuFtpd.py
    - synology.py
    - FtpFritz.py
    - westerndigital.py
    - FtpQnapTurboNas.py
    - FtpSharp.py
    - FtpLexmark.py
    - apple.py
    - FtpWesternDigital.py
    - http.py
    - ciscohttps.py
    - solar_log_mei.py
    - FtpZxfs.py
    - flexim.py
    - FtpIbm.py
    - jdhttps.py
    - lifesize.py
    - FtpFullrate.py
    - lexmarkhttps.py
    - supermicro.py
    - abb_stotz_kontakt.py
    - rockwell_mei.py
    - virata.py
    - FtpTrimbleFtpd.py
    - FtpOpto22.py
    - FtpXerox.py
    - gordian.py
    - FtpCerebusFtpd.py
    - FtpRicoh.py
    - FtpVodaphone.py
    - FtpKebi.py
    - epson.py
    - microsoftwince.py
    - FtpSpeedPort.py
    - aruba.py
    - netgear.py
    - FtpGenericCamera.py
    - ocm_pro_cf.py
    - redhat.py
    - FtpIis.py
    - windows_mei.py
    - goaheadwebs.py
    - panasonic_web_server.py
    - FtpZte.py
    - honeywell.py
    - FtpLeightronix.py
    - hikvision.py
    - FtpFilezilla.py
    - FtpTitanFtpd.py
    - konicaminoltahttps.py
    - fox.py
    - raritan.py
    - integ_mei.py
    - common_smtp.py
    - ssh_generic_os.py
    - heartbleed.py
    - boa.py
    - iii100.py
    - FtpOverlandStorage.py
    - export.py
    - computec.py
    - tivo.py
    - bomgar.py
    - FtpNetApp.py
    - apc.py
    - sharphttps.py
    - ezproxy.py
    - helix.py
    - FtpLacie.py
    - entes.py
    - fritz.py
    - FtpSoftAtHome.py
    - avtech_mei.py
    - allegro.py
    - idrac.py
    - lenovo.py
    - common_pop3.py
    - mbedthis.py
    - FtpEcosense.py
    - panologic.py
    - FtpVsFtpd.py
    - bacnet.py
    - wurm.py
    - lantronix.py
    - telemecanique_mei.py
    - FtpDell.py
    - roomwizard.py
    - FtpIntercon.py
    - room_alert.py
    - dell.py
    - crouzet.py
    - FtpMaygion.py
    - FtpBftpd.py
    - mikrotik.py
    - canon.py
    - FtpDrayTek.py
    - scannex.py
    - fox_brand.py
    - tplink_tl-wr1043nd.py
    - mini_httpd.py
    - cherokee.py
    - FtpRouterBoard.py
    - common_cert_fps.py
    - FtpDlink.py
    - agrantemweb.py
    - FtpNationalInstruments.py
    - microsoft_exchange.py
    - FtpAxis.py
    - hpe_officeconnect.py
    - sensatronics.py
    - seagate.py
    - protocols.py
    - kmhttpd.py
    - siemens_mei.py
    - FtpSony.py
    - FtpNucleus.py
    - FtpSeagate.py
    - mrvl.py
    - silicondust.py
    - ixsystem.py
    - varnish.py
    - veris_e51c3.py
    - __init__.py
    - FtpIqeye.py
    - cougar.py
    - fox_qns.py
    - FtpGene6Ftpd.py
    - hpirg.py
    - FtpZyXel.py
    - microsoftiis.py
    - comtrol.py
    - siemens_simatic.py
    - polycom.py
    - FtpAlcatel.py
    - FtpWarFtpd.py
    - FtpHp.py
    - common_imap.py
    - stripstarttls.py
    - pokeys_56e.py
    - rhel_apache.py
    - digione.py
    - protocols_zgrab2.py
    - polabs_mei.py
    - avtech.py
    - lighttpd.py
    - americanmegatrends.py
    - FtpTelindusFtpd.py
    - FtpTnFtpd.py
    - weg_mei.py
    - FtpTypesoftFtpd.py
    - FtpDreambox.py
    - veris_industries_mei.py
    - thecus.py
    - sunmicrosystems.py
    - cisco_ios.py
    - lab_el_mei.py
    - dlink_voip.py
    - FtpKonicaMinolta.py
    - axis_http.py
    - debut.py
    - FtpMikroTikFtpd.py
    - schneider.py
    - FtpBelkin.py
    - actl.py
    - FtpPureFtpd.py
    - FtpNcFtpd.py
    - se_electronic.py
    - FtpAsus.py
    - microsofthttpapi.py
    - FtpLutron.py
    - synchronic.py
    - FtpDdwrt.py
    - tlskeyknown.py
    - emc.py
    - FtpLatronix.py
    - FtpWFtpd.py
    - osnexus.py
    - FtpGenericDsl.py
    - FtpApc.py
    - printers
      - xeroxhttps.py
      - ipp.py
      - dell.py
      - __init__.py
      - hp.py
    - FtpUClinux.py
    - FtpProFtpd.py
    - FtpGenericUpdate.py
    - bigip.py
    - cisco.py
    - FtpTenor.py
    - FtpAllworks.py
    - hp_networks.py
    - FtpIpTime.py
    - FtpNetgear.py
  - __init__.py
  - errors.py
  - device.py
  - stream.py
  - transforms
    - sslv2.py
    - upnp.py
    - mongodb.py
    - postgres.py
    - ntp.py
    - smtp.py
    - ftp.py
    - http.py
    - dns.py
    - ssh.py
    - ipp.py
    - mysql.py
    - test.json
    - fox.py
    - pop3.py
    - bacnet.py
    - https.py
    - imap.py
    - cwmp.py
    - dnp3.py
    - mssql.py
    - __init__.py
    - oracle.py
    - modbus.py
    - smb.py
    - telnet.py
    - s7.py
  - annotator.py
- setup.py
- .travis.yml
- README.md
- .gitignore

import csv
import os
import sys
import time
import collections
import threading
import logging

from ztag.errors import IgnoreObject


class Updater(object):
    """
    Updater encapsulates the behavior for the updates.csv file; put_update() is called with each
    update, but output is only written every :frequency: seconds.
    """
    def __init__(self, output=None, frequency=1.0, logger=None):
        self.output = output
        self.frequency = frequency
        self.logger = logger
        self.prev = None
        self._wrote_labels = False

    def put_update(self, row):
        if not self.output:
            return

        if self.prev and (row.time - self.prev.time) < self.frequency:
            return

        self.prev = row

        if not self._wrote_labels:
            self.output.write(row.get_csv_labels() + "\n")
            self._wrote_labels = True

        self.output.write(row.get_csv() + "\n")
        self.output.flush()

    def close(self):
        if self.output and self.output != sys.stderr:
            try:
                self.output.close()
            except BaseException as e:
                if self.logger:
                    self.logger.warn("Failed to close updates CSV stream: %s", str(e))


class UpdateRow(object):
    """
    UpdateRow encapsulates the information for a single update and the logic for outputting it as
    a CSV row.
    """
    ORDER = ("skipped", "handled", "delta_skipped", "delta_handled")

    def __init__(self, skipped, handled, updated_at=None, prev=None):
        """
        Construct a new row with the given number of skipped / handled entries, and calculate the
        deltas from prev (or set them to 0). Also sets time to now.
        :param skipped: current total number of skipped records
        :param handled: current total number of handled records
        :param prev: the previous UpdateRow
        """
        self.time = updated_at or time.time()
        self.skipped = skipped
        self.handled = handled
        if prev:
            self.delta_skipped = skipped - prev.skipped
            self.delta_handled = handled - prev.handled
        else:
            self.delta_skipped = 0
            self.delta_handled = 0

    @classmethod
    def get_csv_labels(cls):
        return ",".join(cls.ORDER)

    def get_csv(self):
        return ",".join(str(getattr(self, label)) for label in self.ORDER)


class Stream(object):

    def __init__(self, incoming, outgoing, transforms=None, logger=None, updates=None):
        super(Stream, self).__init__()
        self.incoming = incoming
        self.outgoing = outgoing
        self.transforms = transforms or list()
        self.logger = logger
        if updates:
            self.updater = Updater(output=updates, frequency=1.0, logger=logger)
        else:
            self.updater = None

    def put_update(self, skipped, handled):
        if not self.updater:
            return
        this_update = UpdateRow(skipped=skipped, handled=handled, prev=self.updater.prev)
        self.updater.put_update(this_update)

    def run(self):
        skipped = 0
        handled = 0
        for obj in self.incoming:
            self.put_update(handled=handled, skipped=skipped)
            try:
                out = obj
                for transformer in self.transforms:
                    out = transformer.transform(out)
                    if out is None:
                        raise IgnoreObject()
                self.outgoing.take(out)
                handled += 1
            except IgnoreObject as e:
                if self.logger:
                    self.logger.debug(e.original_exception)
                    self.logger.trace(obj)
                    if e.trback:
                        self.logger.warn(e.trback)
                skipped += 1
                continue
        self.outgoing.cleanup()
        if self.updater:
            self.updater.close()
        return (handled, skipped)


class Incoming(object):
    pass


class InputFile(Incoming):

    def __init__(self, input_file=sys.stdin):
        self.input_file = input_file

    def __iter__(self):
        for line in self.input_file:
            yield line


class InputCSV(Incoming):

    def __init__(self, input_file=sys.stdin):
        self.input_file = input_file
        self.csvdict = csv.DictReader(self.input_file)

    def __iter__(self):
        for record in self.csvdict:
            yield record


class Outgoing(object):

    def __init__(self, *args, **kwargs):
        pass

    def take(self, obj):
        raise NotImplementedError

    def cleanup(self):
        pass


class PythonPrint(Outgoing):

    def __init__(self, *args, **kwargs):
        super(PythonPrint, self).__init__()

    def take(self, obj):
        print obj


class OutputFile(Outgoing):

    def __init__(self, output_file=sys.stdout, *args, **kwargs):
        super(OutputFile, self).__init__()
        self.output_file = output_file

    def take(self, obj):
        self.output_file.write(obj)
        self.output_file.write("\n")


class RedisQueue(Outgoing):

    CERTIFICATES_QUEUE = "certificate"
    PUBKEY_QUEUE = "pubkey"
    # we might as well try to do a whole bunch. The _worst_ case scenario
    # by setting a limit too high is that the server runs out of memory
    # and kills python and the task fails. Which would have happened
    # anyway, because we couldn't connect to redis.
    MAX_RETRIES = 60
    BATCH_SIZE = 250

    def __init__(self, logger=None, destination=None, *args, **kwargs):
        import redis
        super(RedisQueue, self).__init__(*args, **kwargs)
        host = os.environ.get('ZTAG_REDIS_HOST', 'localhost')
        port = int(os.environ.get('ZTAG_REDIS_PORT', 6379))
        if destination == "full_ipv4":
            queue = "ipv4"
        elif destination == "alexa_top1mil":
            queue = "domain"
        else:
            raise Exception("invalid destination: %s" % destination)
        self.logger = logger
        self.queue = queue
        try:
            self.redis = redis.Redis(host=host, port=port, db=0,
                                     socket_connect_timeout=10)
        except redis.ConnectionError as e:
            msg = "could not connect to redis: %s" % str(e)
            self.logger.fatal(msg)
        # batching
        self.queued = 0
        self.retries = 0
        self.records = []
        self.certificates = []

    def push(self, noretry=False):
        import redis
        if self.queued == 0:
            return
        try:
            p = self.redis.pipeline()
            for r in self.records:
                p.rpush(self.queue, r)
            for r in self.certificates:
                p.rpush(self.CERTIFICATES_QUEUE, r)
            p.execute()
            self.queued = 0
            self.records = []
            self.certificates = []
            self.retries = 0
        except redis.ConnectionError as e:
            time.sleep(1.0)
            self.retries += 1
            if self.retries > self.MAX_RETRIES or noretry:
                msg = "redis connection error: %s" % str(e)
                self.logger.fatal(msg)
                self.redis = None

    def take(self, pbout):
        self.records.append(pbout.transformed)
        self.certificates.extend(pbout.certificates)
        self.queued += (len(pbout.certificates) + 1)
        if self.queued > self.BATCH_SIZE:
            self.push()

    def cleanup(self):
        return self.push(noretry=True)


class Kafka(Outgoing):

    def __init__(self, logger=None, destination=None, *args, **kwargs):
        from kafka import KafkaProducer
        if destination == "full_ipv4":
            self.topic = "ipv4"
        elif destination == "alexa_top1mil":
            self.topic = "domain"
        else:
            raise Exception("invalid destination: %s" % destination)
        host = os.environ.get('KAFKA_BOOTSTRAP_HOST', 'localhost:9092')
        self.main_producer = KafkaProducer(bootstrap_servers=host)
        self.cert_producer = KafkaProducer(bootstrap_servers=host)

    def take(self, pbout):
        for certificate in pbout.certificates:
            self.cert_producer.send("certificate", certificate)
        self.main_producer.send(self.topic, pbout.transformed)

    def cleanup(self):
        if self.main_producer:
            self.main_producer.flush()
        if self.cert_producer:
            self.cert_producer.flush()

failed_msg_t = collections.namedtuple('failed_msg_t', 'topic msg attempt')

class PubsubState():
    '''
    Hold state with single course-grained lock. Restrict to safe operations
    on shared memory.
    '''

    def __init__(self):
        self._lock = threading.Lock()
        self._npending_msgs = 0
        self._failed_msgs = []

        # An individual thread raising an exception or calling
        # sys.exit() will only end that thread. Use this to
        # signal the rest of the threads to exit.
        self.exit_exception = None

    def inc_npending(self):
        self._lock.acquire()
        self._npending_msgs += 1
        self._lock.release()

    def dec_npending(self):
        self._lock.acquire()
        self._npending_msgs -= 1
        self._lock.release()

    def get_npending(self):
        '''
        No lock required to simply read int; no direct writes allowed.
        '''
        return self._npending_msgs

    def add_failed_msg(self, topic, msg, attempt):
        self._lock.acquire()
        self._failed_msgs.append(failed_msg_t(topic, msg, attempt))
        self._lock.release()

    def retrieve_failed_msgs(self):
        '''
        Retrieve list of failed messages and reset running list. Returned
        value is no longer shared data.
        '''
        self._lock.acquire()
        retval = self._failed_msgs
        self._failed_msgs = []
        self._lock.release()
        return retval

class Pubsub(Outgoing):

    MAX_ATTEMPTS = 5

    def __init__(self, logger=None, destination=None, *args, **kwargs):
        import google
        from google.cloud import pubsub, pubsub_v1
        self.logger = logger
        if logger is None:
            self.logger = logging.getLogger('null-logger')
            self.logger.setLevel(9999)
        if destination == "full_ipv4":
            self.topic_url = os.environ.get('PUBSUB_IPV4_TOPIC_URL')
        elif destination == "alexa_top1mil":
            self.topic_url = os.environ.get('PUBSUB_ALEXA_TOPIC_URL')
        self.cert_topic_url = os.environ.get('PUBSUB_CERT_TOPIC_URL')
        if not self.topic_url:
            raise Exception('missing $PUBSUB_[IPV4|ALEXA]_TOPIC_URL')
        if not self.cert_topic_url:
            raise Exception('missing $PUBSUB_CERT_TOPIC_URL')
        batch_settings = pubsub_v1.types.BatchSettings(
            # "The entire request including one or more messages must
            #  be smaller than 10MB, after decoding."
            max_bytes=8192000,  # 8 MB
            max_latency=15,     # 15 seconds
        )
        self.publisher = pubsub.PublisherClient(batch_settings)
        self.publish_count = {}
        try:
            self.publisher.get_topic(self.topic_url)
            self.publisher.get_topic(self.cert_topic_url)
        except google.api_core.exceptions.GoogleAPICallError as e:
            logger.error(e.message)
            raise
        self._state = PubsubState()

    def _make_done_callback(self, topic, data, attempt):
        def done_callback(future):
            if self._state.exit_exception:
                sys.exit(1)
            exception = future.exception()
            if not exception:
                self.logger.debug("Publish attempt #{attempt}/{max} on topic '{topic}' "
                                  "succeeded.".format(attempt=attempt + 1,
                                                      max=self.MAX_ATTEMPTS,
                                                      topic=topic))
                self._state.dec_npending()
            else:
                self.logger.error("Publish attempt #{attempt}/{max} failed for data '{data}' on"
                                  "topic '{topic}' {error}"
                                  .format(attempt=attempt + 1,
                                          max=self.MAX_ATTEMPTS,
                                          data=data,
                                          topic=topic,
                                          error=str(exception)))
                if attempt >= self.MAX_ATTEMPTS:
                    self._state.exit_exception = exception
                    sys.exit(1)
                self._state.add_failed_msg(topic, data, attempt + 1)

        return done_callback

    def _publish_with_callback(self, topic, data, attempt):
        if attempt == 0:
            self._state.inc_npending()
        cb = self._make_done_callback(topic, data, attempt)
        publish_future = self.publisher.publish(topic, data)
        publish_future.add_done_callback(cb)

    def take(self, pbout):
        for certificate in pbout.certificates:
            self._publish_with_callback(self.cert_topic_url, certificate, 0)
        self._publish_with_callback(self.topic_url, pbout.transformed, 0)

    def cleanup(self):
        while self._state.get_npending() > 0:
            time.sleep(10)
            if self._state.exit_exception:
                self.logger.error("Max attempts exceeded; raising most recent exception.")
                raise self._state.exit_exception
            failed_msgs = self._state.retrieve_failed_msgs()
            self.logger.debug("Failed message queuelen: {}, "
                              "messages pending: {}"
                              .format(len(failed_msgs),
                                      self._state.get_npending()))
            for failed in failed_msgs:
                self._publish_with_callback(failed.topic, failed.msg,
                                            failed.attempt + 1)
        self.logger.debug("Pubsub cleanup: Finished.")