Python kafka.KafkaClient() Examples

The following are 25 code examples of kafka.KafkaClient(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module kafka , or try the search function .
Example #1
Source File: main.py    From kafka-utils with Apache License 2.0 6 votes vote down vote up
def get_partition_leaders(cluster_config):
    """Return the current leaders of all partitions. Partitions are
    returned as a "topic-partition" string.

    :param cluster_config: the cluster
    :type cluster_config: kafka_utils.utils.config.ClusterConfig
    :returns: leaders for partitions
    :rtype: map of ("topic-partition", broker_id) pairs
    """
    client = KafkaClient(cluster_config.broker_list)
    result = {}
    for topic, topic_data in six.iteritems(client.topic_partitions):
        for partition, p_data in six.iteritems(topic_data):
            topic_partition = topic + "-" + str(partition)
            result[topic_partition] = p_data.leader
    return result 
Example #2
Source File: test_consumer.py    From yelp_kafka with Apache License 2.0 6 votes vote down vote up
def test_commit_message_zk(self, config):
        if getattr(KafkaClient, 'send_offset_commit_request_kafka', None) is None:
            return

        with mock_kafka() as (mock_client, mock_consumer):
            config._config['offset_storage'] = 'zookeeper'
            consumer = KafkaSimpleConsumer('test_topic', config)
            consumer.connect()

            actual = consumer.commit_message(
                Message(0, 100, 'mykey', 'myvalue'),
            )

            assert actual is True
            mock_client.return_value.send_offset_commit_request \
                .assert_called_once_with(
                    'test_group'.encode(),
                    [OffsetCommitRequest('test_topic'.encode(), 0, 100, None)],
                ) 
Example #3
Source File: test_consumer.py    From yelp_kafka with Apache License 2.0 6 votes vote down vote up
def test_commit_message_kafka(self, config):
        if getattr(KafkaClient, 'send_offset_commit_request_kafka', None) is None:
            return

        with mock_kafka() as (mock_client, mock_consumer):
            config._config['offset_storage'] = 'kafka'
            consumer = KafkaSimpleConsumer('test_topic', config)
            consumer.connect()

            actual = consumer.commit_message(
                Message(0, 100, 'mykey', 'myvalue'),
            )

            assert actual is True
            assert not mock_client.return_value.send_offset_commit_request.called
            mock_client.return_value.send_offset_commit_request_kafka \
                .assert_called_once_with(
                    'test_group'.encode(),
                    [OffsetCommitRequest('test_topic'.encode(), 0, 100, None)],
                ) 
Example #4
Source File: discovery.py    From yelp_kafka with Apache License 2.0 6 votes vote down vote up
def discover_topics(cluster):
    """Get all the topics in a cluster

    :param cluster: config of the cluster to get topics from
    :type cluster: ClusterConfig
    :returns: a dict <topic>: <[partitions]>
    :raises DiscoveryError: upon failure to request topics from kafka
    """
    client = KafkaClient(cluster.broker_list)
    try:
        topics = get_kafka_topics(client)
        return dict([(topic.decode(), partitions) for topic, partitions in six.iteritems(topics)])
    except:
        log.exception(
            "Topics discovery failed for %s",
            cluster.broker_list
        )
        raise DiscoveryError("Failed to get topics information from "
                             "{cluster}".format(cluster=cluster)) 
Example #5
Source File: discovery.py    From yelp_kafka with Apache License 2.0 6 votes vote down vote up
def get_kafka_connection(cluster_type, client_id, **kwargs):
    """Get a kafka connection for the local region kafka cluster at Yelp.

    :param cluster_type: kafka cluster type (ex.'scribe' or 'standard').
    :type cluster_type: string
    :param client_id: client_id to be used to connect to kafka.
    :type client_id: string
    :param kwargs: parameters to pass along when creating the KafkaClient instance.
    :returns: KafkaClient
    :raises DiscoveryError: :py:class:`yelp_kafka.error.DiscoveryError` upon failure connecting to a cluster.
    """
    cluster = get_region_cluster(cluster_type, client_id)
    try:
        return KafkaClient(cluster.broker_list, client_id=client_id, **kwargs)
    except:
        log.exception(
            "Connection to kafka cluster %s using broker list %s failed",
            cluster.name,
            cluster.broker_list
        )
        raise DiscoveryError("Failed to connect to cluster {0}".format(
            cluster.name)) 
Example #6
Source File: consumer.py    From yelp_kafka with Apache License 2.0 6 votes vote down vote up
def connect(self):
        """ Connect to kafka and create a consumer.
        It uses config parameters to create a kafka-python
        KafkaClient and SimpleConsumer.
        """
        # Instantiate a kafka client connected to kafka.
        self.client = KafkaClient(
            self.config.broker_list,
            client_id=self.config.client_id
        )

        # Create a kafka SimpleConsumer.
        self.kafka_consumer = SimpleConsumer(
            client=self.client, topic=self.topic, partitions=self.partitions,
            **self.config.get_simple_consumer_args()
        )
        self.log.debug(
            "Connected to kafka. Topic %s, partitions %s, %s",
            self.topic,
            self.partitions,
            ','.join(['{0} {1}'.format(k, v) for k, v in
                      six.iteritems(self.config.get_simple_consumer_args())])
        )
        self.kafka_consumer.provide_partition_info() 
Example #7
Source File: redis-monitor.py    From openslack-crawler with Apache License 2.0 5 votes vote down vote up
def setup(self):
        '''
        Connection stuff here so we can mock it
        '''
        self.redis_conn = redis.Redis(host=REDIS_HOST, port=REDIS_PORT)

        # set up kafka
        self.kafka_conn = KafkaClient(KAFKA_HOSTS)
        self.producer = SimpleProducer(self.kafka_conn)
        self.topic_prefix = KAFKA_TOPIC_PREFIX 
Example #8
Source File: context.py    From panoptes with Apache License 2.0 5 votes vote down vote up
def _get_kafka_client(self):
        """
        Create and return a Kafka Client

        Returns:
            KafkaClient: The created Kafka client

        Raises:
            PanoptesContextError: Passes through any exceptions that happen in trying to create the Kafka client
        """
        # The logic of the weird check that follows is this: KafkaClient initialization can fail if there is a problem
        # connecting with even one broker. What we want to do is: succeed if the client was able to connect to even one
        # broker. So, we catch the exception and pass it through - and then check the number of brokers connected to the
        # client in the next statement (if not kafka_client.brokers) and fail if the client is not connected to any
        # broker
        self.__logger.info(u'Attempting to connect Kafka')
        config = self.__config
        kafka_client = None
        try:
            kafka_client = KafkaClient(config.kafka_brokers)
        except ConnectionError:
            pass

        if not kafka_client.brokers:
            raise PanoptesContextError(u'Could not connect to any Kafka broker from this list: %s'
                                       % config.kafka_brokers)
        self.__logger.info(u'Successfully connected to Kafka brokers: %s' % kafka_client.brokers)

        return kafka_client 
Example #9
Source File: containers_test.py    From data_pipeline with Apache License 2.0 5 votes vote down vote up
def test_get_kafka_connection(containers):
    """
    Asserts that the method returns a working kafka client connection.
    """
    kafka_connection = containers.get_kafka_connection(timeout_seconds=1)
    assert isinstance(kafka_connection, KafkaClient) 
Example #10
Source File: kafka_docker.py    From data_pipeline with Apache License 2.0 5 votes vote down vote up
def setup_capture_new_messages_consumer(topic):
    """Seeks to the tail of the topic then returns a function that can
    consume messages from that point.
    """
    kafka = KafkaClient(get_config().cluster_config.broker_list)
    group = str('data_pipeline_clientlib_test')
    consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=_ONE_MEGABYTE)
    consumer.seek(0, 2)  # seek to tail, 0 is the offset, and 2 is the tail

    yield consumer

    kafka.close() 
Example #11
Source File: _kafka_producer.py    From data_pipeline with Apache License 2.0 5 votes vote down vote up
def __init__(self, producer_position_callback, dry_run=False):
        self.producer_position_callback = producer_position_callback
        self.dry_run = dry_run
        self.kafka_client = KafkaClient(get_config().cluster_config.broker_list)
        self.position_data_tracker = PositionDataTracker()
        self._reset_message_buffer()
        self.skip_messages_with_pii = get_config().skip_messages_with_pii
        self._publish_retry_policy = RetryPolicy(
            ExpBackoffPolicy(with_jitter=True),
            max_retry_count=get_config().producer_max_publish_retry_count
        )
        self._automatic_flush_enabled = True 
Example #12
Source File: tailer.py    From data_pipeline with Apache License 2.0 5 votes vote down vote up
def _configure_tools(self):
        load_default_config(
            self.options.config_file,
            self.options.env_config_file
        )

        # We setup logging 'early' since we want it available for setup_topics
        self._setup_logging()

        self.kafka_client = KafkaClient(get_config().cluster_config.broker_list)

        self._setup_topics()
        if len(self.topic_to_offsets_map) == 0:
            self.option_parser.error("At least one topic must be specified.")

        if self.options.start_timestamp is not None and self.options.start_timestamp >= int(time.time()):
            self.option_parser.error("--start-timestamp should not be later than current time")

        if self.options.start_timestamp is not None and self.options.end_timestamp and (
            self.options.start_timestamp > self.options.end_timestamp
        ):
            self.option_parser.error("--end-timestamp must not be smaller than --start-timestamp")

        if self.options.all_fields:
            self.options.fields = self._public_message_field_names

        self._verify_offset_ranges() 
Example #13
Source File: base_command.py    From data_pipeline with Apache License 2.0 5 votes vote down vote up
def _kafka_client(self):
        kafka_client = KafkaClient(self.config.cluster_config.broker_list)
        try:
            yield kafka_client
        finally:
            kafka_client.close() 
Example #14
Source File: base_consumer.py    From data_pipeline with Apache License 2.0 5 votes vote down vote up
def kafka_client(self):
        """ Returns the `KafkaClient` object."""
        return KafkaClient(self._region_cluster_config.broker_list) 
Example #15
Source File: client.py    From kzmonitor with MIT License 5 votes vote down vote up
def __init__(self, broker):
        self.broker = broker
        self.client = KafkaClient(broker, timeout=3) 
Example #16
Source File: consumer.py    From yelp_kafka with Apache License 2.0 5 votes vote down vote up
def close(self):
        """Disconnect from kafka.
        If auto_commit is enabled commit offsets before disconnecting.
        """
        if self.kafka_consumer.auto_commit is True:
            try:
                self.commit()
            except:
                self.log.exception("Commit error. "
                                   "Offsets may not have been committed")
        # Close all the connections to kafka brokers. KafkaClient open
        # connections to all the partition leaders.
        self.client.close() 
Example #17
Source File: discovery.py    From yelp_kafka with Apache License 2.0 5 votes vote down vote up
def get_all_kafka_connections(cluster_type, client_id, **kwargs):
    """Get a kafka connection for each available kafka cluster at Yelp.

    :param cluster_type: kafka cluster type (ex.'scribe' or 'standard').
    :type cluster_type: string
    :param client_id: client_id to be used to connect to kafka.
    :type client_id: string
    :param kwargs: parameters to pass along when creating the KafkaClient instance.
    :returns: list (cluster_name, KafkaClient)
    :raises DiscoveryError: :py:class:`yelp_kafka.error.DiscoveryError` upon failure connecting to a cluster.

    .. note:: This function creates a KafkaClient for each cluster in a region and tries to connect to it. If a cluster is not available it fails and closes all the previous connections.
    """

    clusters = get_all_clusters(cluster_type, client_id)
    connected_clusters = []
    for cluster in clusters:
        try:
            client = KafkaClient(cluster.broker_list, client_id=client_id, **kwargs)
            connected_clusters.append((cluster.name, client))
        except:
            log.exception(
                "Connection to kafka cluster %s using broker list %s failed",
                cluster.name,
                cluster.broker_list
            )
            for _, client in connected_clusters:
                client.close()
            raise DiscoveryError("Failed to connect to cluster {0}".format(
                cluster.name))
    return connected_clusters 
Example #18
Source File: test_consumer.py    From yelp_kafka with Apache License 2.0 5 votes vote down vote up
def mock_kafka():
    with mock.patch('yelp_kafka.consumer.KafkaClient', autospec=True) as mock_client:
        with mock.patch('yelp_kafka.consumer.SimpleConsumer', autospec=True) as mock_consumer:
            mock_consumer.return_value.auto_commit = True
            yield mock_client, mock_consumer 
Example #19
Source File: test_consumer.py    From yelp_kafka with Apache License 2.0 5 votes vote down vote up
def test_simple_consumer():
    topic = create_random_topic(1, 1)

    messages = [str(i).encode("UTF-8") for i in range(100)]

    cluster_config = ClusterConfig(None, None, [KAFKA_URL], ZOOKEEPER_URL)
    producer = YelpKafkaSimpleProducer(
        cluster_config=cluster_config,
        report_metrics=False,
        client=KafkaClient(KAFKA_URL),
    )
    producer.send_messages(topic, *messages)

    config = KafkaConsumerConfig(
        'test',
        cluster_config,
        auto_offset_reset='smallest',
        auto_commit=False,
        consumer_timeout_ms=1000
    )
    consumer = KafkaSimpleConsumer(topic, config)

    with consumer:
        for expected_offset in range(100):
            message = consumer.get_message()
            assert message.offset == expected_offset
            assert message.partition == 0
            assert message.value == str(expected_offset).encode("UTF-8") 
Example #20
Source File: api.py    From flasfka with MIT License 5 votes vote down vote up
def get_kafka_client():
    if not hasattr(flask.g, "kafka_client"):
        flask.g.kafka_client = kafka.KafkaClient(app.config["HOSTS"])
    return flask.g.kafka_client 
Example #21
Source File: kafka_random_reader.py    From exporters with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, *args, **kwargs):
        import kafka
        super(KafkaRandomReader, self).__init__(*args, **kwargs)
        brokers = self.read_option('brokers')
        group = self.read_option('group')
        topic = self.read_option('topic')

        client = kafka.KafkaClient(map(bytes, brokers))

        # TODO: Remove this comments when next steps are decided.
        # If resume is set to true, then child should not load initial offsets
        # child_loads_initial_offsets = False if settings.get('RESUME') else True

        # self.consumer = kafka.MultiProcessConsumer(client, group, topic, num_procs=1,
        #                                             child_loads_initial_offsets=child_loads_initial_offsets,
        #                                             auto_commit=False)

        self.consumer = kafka.SimpleConsumer(client, group, topic,
                                             auto_commit=False)

        self.decompress_fun = zlib.decompress
        self.processor = self.create_processor()
        self.partitions = client.get_partition_ids_for_topic(topic)

        self.logger.info(
            'KafkaRandomReader has been initiated. '
            'Topic: {}. Group: {}'.format(self.read_option('topic'), self.read_option('group')))

        self.logger.info('Running random sampling')
        self._reservoir = self.fill_reservoir()
        self.logger.info('Random sampling completed, ready to process batches') 
Example #22
Source File: tweet_sampler.py    From straw with MIT License 5 votes vote down vote up
def __init__(self, APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET, host, port):
        super(KafkaStrawStreamer, self).__init__(APP_KEY, APP_SECRET,OAUTH_TOKEN, OAUTH_TOKEN_SECRET)

        # connect to Kafka
        print("Connecting to Kafka node {0}:{1}".format(host, port))
        kafka = KafkaClient("{0}:{1}".format(host, port))
        self.producer = BufferedSimpleProducer(kafka, 100) 
Example #23
Source File: context.py    From panoptes with Apache License 2.0 5 votes vote down vote up
def kafka_client(self):
        """
        A Kafka client

        Returns:
            KafkaClient

        """
        return self._kafka_client 
Example #24
Source File: test_consumer.py    From yelp_kafka with Apache License 2.0 4 votes vote down vote up
def run_kafka_consumer_group_test(num_consumers, num_partitions):
    topic = create_random_topic(1, num_partitions)
    cluster_config = ClusterConfig(None, None, [KAFKA_URL], ZOOKEEPER_URL)
    config = KafkaConsumerConfig(
        'test',
        cluster_config,
        auto_offset_reset='smallest',
        partitioner_cooldown=5,
        auto_commit_interval_messages=1,
    )

    queue = Queue()

    def create_consumer():
        def consume():
            consumer = KafkaConsumerGroup([topic], config)
            with consumer:
                while True:
                    try:
                        message = consumer.next()
                        queue.put(message)
                        consumer.task_done(message)
                    except ConsumerTimeout:
                        return

        p = Process(target=consume)
        p.daemon = True
        return p

    consumer_processes = [create_consumer() for _ in range(num_consumers)]

    for consumer_process in consumer_processes:
        consumer_process.start()

    producer = YelpKafkaSimpleProducer(
        cluster_config=cluster_config,
        report_metrics=False,
        client=KafkaClient(KAFKA_URL),
    )
    for i in range(100):
        producer.send_messages(topic, str(i).encode("UTF-8"))

    # wait until all 100 messages have been consumed
    while queue.qsize() < 100:
        time.sleep(0.1)

    received_messages = []
    while True:
        try:
            message = queue.get(block=True, timeout=0.5)
        except Empty:
            break
        received_messages.append(int(message.value))

    assert [i for i in range(100)] == sorted(received_messages) 
Example #25
Source File: straw_app.py    From straw with MIT License 4 votes vote down vote up
def __init__(self, config):
    
        app = Flask(__name__)
        app.secret_key = 'i love to search full text in real time'

        # attach a redis connection pool
        app.pool = redis.ConnectionPool(host="localhost", port=6379)

        # user -> channels mapping
        app.user_channels = {}

        # how to handle messages that enter the stream from redis pub sub
        def redis_message_handler(msg):
            redis_connection = redis.Redis(connection_pool=app.pool)
            # get channel and content of incoming message
            channel = msg['channel']
            data = msg['data']

            # word highlighting -- TODO: this would be better to do in the search engine!
            query = redis_connection.get(channel)
            words = list(set(query.split(" ")))
            for w in words:
                data=data.lower().replace(w.lower(), highlight(w.lower()))

            # find users subscribed to this channel
            if app.user_channels.get(channel) is not None:
                for user in app.user_channels.get(channel):
                    redis_connection.lpush(user, data)
            else:
                # no more users for this channel, unsubscribe from it
                redis_connection.unsubscribe(channel)            
            
        # Add Redis query subscriber to app
        app.disp = []
        app.subscriber = QuerySubscriber("localhost", 6379, redis_message_handler)

        # setup kafka producer in the app
        kafka = KafkaClient("{0}:{1}".format(config["zookeeper_host"], 9092))
        app.producer = SimpleProducer(kafka)

        # add the app
        self.app = app