kafka.common.TopicAndPartition Scala Examples

The following examples show how to use kafka.common.TopicAndPartition. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: KafkaClient.scala    From incubator-retired-gearpump   with Apache License 2.0 6 votes vote down vote up
package org.apache.gearpump.streaming.kafka.lib.util

import kafka.admin.AdminUtils
import kafka.cluster.Broker
import kafka.common.TopicAndPartition
import kafka.consumer.SimpleConsumer
import kafka.utils.{ZKStringSerializer, ZkUtils}
import org.I0Itec.zkclient.ZkClient
import org.apache.gearpump.streaming.kafka.lib.source.consumer.KafkaConsumer
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.apache.gearpump.util.LogUtil
import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.kafka.common.serialization.Serializer

object KafkaClient {
  private val LOG = LogUtil.getLogger(classOf[KafkaClient])

  val factory = new KafkaClientFactory

  class KafkaClientFactory extends java.io.Serializable {
    def getKafkaClient(config: KafkaConfig): KafkaClient = {
      val consumerConfig = config.getConsumerConfig
      val zkClient = new ZkClient(consumerConfig.zkConnect, consumerConfig.zkSessionTimeoutMs,
        consumerConfig.zkConnectionTimeoutMs, ZKStringSerializer)
      new KafkaClient(config, zkClient)
    }
  }
}

class KafkaClient(config: KafkaConfig, zkClient: ZkClient) {
  import org.apache.gearpump.streaming.kafka.lib.util.KafkaClient._

  private val consumerConfig = config.getConsumerConfig

  def getTopicAndPartitions(consumerTopics: List[String]): Array[TopicAndPartition] = {
    try {
      ZkUtils.getPartitionsForTopics(zkClient, consumerTopics).flatMap {
        case (topic, partitions) => partitions.map(TopicAndPartition(topic, _))
      }.toArray
    } catch {
      case e: Exception =>
        LOG.error(e.getMessage)
        throw e
    }
  }

  def getBroker(topic: String, partition: Int): Broker = {
    try {
      val leader = ZkUtils.getLeaderForPartition(zkClient, topic, partition)
        .getOrElse(throw new RuntimeException(
          s"leader not available for TopicAndPartition($topic, $partition)"))
      ZkUtils.getBrokerInfo(zkClient, leader)
        .getOrElse(throw new RuntimeException(s"broker info not found for leader $leader"))
    } catch {
      case e: Exception =>
        LOG.error(e.getMessage)
        throw e
    }
  }

  def createConsumer(topic: String, partition: Int, startOffsetTime: Long): KafkaConsumer = {
    val broker = getBroker(topic, partition)
    val soTimeout = consumerConfig.socketTimeoutMs
    val soBufferSize = consumerConfig.socketReceiveBufferBytes
    val clientId = consumerConfig.clientId
    val fetchSize = consumerConfig.fetchMessageMaxBytes
    val consumer = new SimpleConsumer(broker.host, broker.port, soTimeout, soBufferSize, clientId)
    KafkaConsumer(topic, partition, startOffsetTime, fetchSize, consumer)
  }

  def createProducer[K, V](keySerializer: Serializer[K],
      valueSerializer: Serializer[V]): KafkaProducer[K, V] = {
    new KafkaProducer[K, V](config.getProducerConfig, keySerializer, valueSerializer)
  }

  
  def createTopic(topic: String, partitions: Int, replicas: Int): Boolean = {
    try {
      if (AdminUtils.topicExists(zkClient, topic)) {
        LOG.info(s"topic $topic exists")
        true
      } else {
        AdminUtils.createTopic(zkClient, topic, partitions, replicas)
        LOG.info(s"created topic $topic")
        false
      }
    } catch {
      case e: Exception =>
        LOG.error(e.getMessage)
        throw e
    }
  }

  def close(): Unit = {
    zkClient.close()
  }
} 
Example 2
Source File: KafkaProvider.scala    From bandar-log   with Apache License 2.0 5 votes vote down vote up
package com.aol.one.dwh.bandarlog.providers

import com.aol.one.dwh.bandarlog.connectors.KafkaConnector
import com.aol.one.dwh.bandarlog.metrics.AtomicValue
import com.aol.one.dwh.infra.config.Topic
import kafka.common.TopicAndPartition


  private def getLag(heads: Map[TopicAndPartition, Long], offsets: Map[TopicAndPartition, Long]): Long = {
    if (offsets.keySet.isEmpty) {
      heads.values.sum
    } else {
      val lags = heads.map { case (key, _) =>
        val lagValue = heads(key) - offsets.getOrElse(key, 0L)
        lagValue.max(0)
      }
      lags.sum
    }
  }
} 
Example 3
Source File: KafkaConsumer.scala    From Swallow   with Apache License 2.0 5 votes vote down vote up
package com.intel.hibench.common.streaming.metrics

import java.util.Properties

import kafka.api.{OffsetRequest, FetchRequestBuilder}
import kafka.common.ErrorMapping._
import kafka.common.TopicAndPartition
import kafka.consumer.{ConsumerConfig, SimpleConsumer}
import kafka.message.MessageAndOffset
import kafka.utils.{ZKStringSerializer, ZkUtils, Utils}
import org.I0Itec.zkclient.ZkClient

class KafkaConsumer(zookeeperConnect: String, topic: String, partition: Int) {

  private val CLIENT_ID = "metrics_reader"
  private val props = new Properties()
  props.put("zookeeper.connect", zookeeperConnect)
  props.put("group.id", CLIENT_ID)
  private val config = new ConsumerConfig(props)
  private val consumer = createConsumer

  private val earliestOffset = consumer
      .earliestOrLatestOffset(TopicAndPartition(topic, partition), OffsetRequest.EarliestTime, -1)
  private var nextOffset: Long = earliestOffset
  private var iterator: Iterator[MessageAndOffset] = getIterator(nextOffset)

  def next(): Array[Byte] = {
    val mo = iterator.next()
    val message = mo.message

    nextOffset = mo.nextOffset

    Utils.readBytes(message.payload)
  }

  def hasNext: Boolean = {
    @annotation.tailrec
    def hasNextHelper(iter: Iterator[MessageAndOffset], newIterator: Boolean): Boolean = {
      if (iter.hasNext) true
      else if (newIterator) false
      else {
        iterator = getIterator(nextOffset)
        hasNextHelper(iterator, newIterator = true)
      }
    }
    hasNextHelper(iterator, newIterator = false)
  }

  def close(): Unit = {
    consumer.close()
  }

  private def createConsumer: SimpleConsumer = {
    val zkClient = new ZkClient(zookeeperConnect, 6000, 6000, ZKStringSerializer)
    try {
      val leader = ZkUtils.getLeaderForPartition(zkClient, topic, partition)
          .getOrElse(throw new RuntimeException(
            s"leader not available for TopicAndPartition($topic, $partition)"))
      val broker = ZkUtils.getBrokerInfo(zkClient, leader)
          .getOrElse(throw new RuntimeException(s"broker info not found for leader $leader"))
      new SimpleConsumer(broker.host, broker.port,
        config.socketTimeoutMs, config.socketReceiveBufferBytes, CLIENT_ID)
    } catch {
      case e: Exception =>
        throw e
    } finally {
      zkClient.close()
    }
  }

  private def getIterator(offset: Long): Iterator[MessageAndOffset] = {
    val request = new FetchRequestBuilder()
        .addFetch(topic, partition, offset, config.fetchMessageMaxBytes)
        .build()

    val response = consumer.fetch(request)
    response.errorCode(topic, partition) match {
      case NoError => response.messageSet(topic, partition).iterator
      case error => throw exceptionFor(error)
    }
  }
} 
Example 4
Source File: KafkaClusterSuite.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.kafka

import scala.util.Random

import kafka.common.TopicAndPartition
import org.scalatest.BeforeAndAfterAll

import org.apache.spark.SparkFunSuite

class KafkaClusterSuite extends SparkFunSuite with BeforeAndAfterAll {
  private val topic = "kcsuitetopic" + Random.nextInt(10000)
  private val topicAndPartition = TopicAndPartition(topic, 0)
  private var kc: KafkaCluster = null

  private var kafkaTestUtils: KafkaTestUtils = _

  override def beforeAll() {
    kafkaTestUtils = new KafkaTestUtils
    kafkaTestUtils.setup()

    kafkaTestUtils.createTopic(topic)
    kafkaTestUtils.sendMessages(topic, Map("a" -> 1))
    kc = new KafkaCluster(Map("metadata.broker.list" -> kafkaTestUtils.brokerAddress))
  }

  override def afterAll() {
    if (kafkaTestUtils != null) {
      kafkaTestUtils.teardown()
      kafkaTestUtils = null
    }
  }

  test("metadata apis") {
    val leader = kc.findLeaders(Set(topicAndPartition)).right.get(topicAndPartition)
    val leaderAddress = s"${leader._1}:${leader._2}"
    assert(leaderAddress === kafkaTestUtils.brokerAddress, "didn't get leader")

    val parts = kc.getPartitions(Set(topic)).right.get
    assert(parts(topicAndPartition), "didn't get partitions")

    val err = kc.getPartitions(Set(topic + "BAD"))
    assert(err.isLeft, "getPartitions for a nonexistant topic should be an error")
  }

  test("leader offset apis") {
    val earliest = kc.getEarliestLeaderOffsets(Set(topicAndPartition)).right.get
    assert(earliest(topicAndPartition).offset === 0, "didn't get earliest")

    val latest = kc.getLatestLeaderOffsets(Set(topicAndPartition)).right.get
    assert(latest(topicAndPartition).offset === 1, "didn't get latest")
  }

  test("consumer offset apis") {
    val group = "kcsuitegroup" + Random.nextInt(10000)

    val offset = Random.nextInt(10000)

    val set = kc.setConsumerOffsets(group, Map(topicAndPartition -> offset))
    assert(set.isRight, "didn't set consumer offsets")

    val get = kc.getConsumerOffsets(group, Set(topicAndPartition)).right.get
    assert(get(topicAndPartition) === offset, "didn't get consumer offsets")
  }
} 
Example 5
Source File: KafkaClusterSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.kafka

import scala.util.Random

import kafka.common.TopicAndPartition
import org.scalatest.BeforeAndAfterAll

import org.apache.spark.SparkFunSuite

class KafkaClusterSuite extends SparkFunSuite with BeforeAndAfterAll {
  private val topic = "kcsuitetopic" + Random.nextInt(10000)
  private val topicAndPartition = TopicAndPartition(topic, 0)
  private var kc: KafkaCluster = null

  private var kafkaTestUtils: KafkaTestUtils = _

  override def beforeAll() {
    kafkaTestUtils = new KafkaTestUtils
    kafkaTestUtils.setup()

    kafkaTestUtils.createTopic(topic)
    kafkaTestUtils.sendMessages(topic, Map("a" -> 1))
    kc = new KafkaCluster(Map("metadata.broker.list" -> kafkaTestUtils.brokerAddress))
  }

  override def afterAll() {
    if (kafkaTestUtils != null) {
      kafkaTestUtils.teardown()
      kafkaTestUtils = null
    }
  }

  test("metadata apis") {
    val leader = kc.findLeaders(Set(topicAndPartition)).right.get(topicAndPartition)
    val leaderAddress = s"${leader._1}:${leader._2}"
    assert(leaderAddress === kafkaTestUtils.brokerAddress, "didn't get leader")

    val parts = kc.getPartitions(Set(topic)).right.get
    assert(parts(topicAndPartition), "didn't get partitions")

    val err = kc.getPartitions(Set(topic + "BAD"))
    assert(err.isLeft, "getPartitions for a nonexistant topic should be an error")
  }

  test("leader offset apis") {
    val earliest = kc.getEarliestLeaderOffsets(Set(topicAndPartition)).right.get
    assert(earliest(topicAndPartition).offset === 0, "didn't get earliest")

    val latest = kc.getLatestLeaderOffsets(Set(topicAndPartition)).right.get
    assert(latest(topicAndPartition).offset === 1, "didn't get latest")
  }

  test("consumer offset apis") {
    val group = "kcsuitegroup" + Random.nextInt(10000)

    val offset = Random.nextInt(10000)

    val set = kc.setConsumerOffsets(group, Map(topicAndPartition -> offset))
    assert(set.isRight, "didn't set consumer offsets")

    val get = kc.getConsumerOffsets(group, Set(topicAndPartition)).right.get
    assert(get(topicAndPartition) === offset, "didn't get consumer offsets")
  }
} 
Example 6
Source File: KafkaClusterSuite.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.kafka

import scala.util.Random

import kafka.common.TopicAndPartition
import org.scalatest.BeforeAndAfterAll

import org.apache.spark.SparkFunSuite

class KafkaClusterSuite extends SparkFunSuite with BeforeAndAfterAll {
  private val topic = "kcsuitetopic" + Random.nextInt(10000)
  private val topicAndPartition = TopicAndPartition(topic, 0)
  //
  private var kc: KafkaCluster = null

  private var kafkaTestUtils: KafkaTestUtils = _

  override def beforeAll() {
    kafkaTestUtils = new KafkaTestUtils
    kafkaTestUtils.setup()

    kafkaTestUtils.createTopic(topic)
    kafkaTestUtils.sendMessages(topic, Map("a" -> 1))
    kc = new KafkaCluster(Map("metadata.broker.list" -> kafkaTestUtils.brokerAddress))
  }

  override def afterAll() {
    if (kafkaTestUtils != null) {
      kafkaTestUtils.teardown()
      kafkaTestUtils = null
    }
  }

  test("metadata apis") {//元数据API
    val leader = kc.findLeaders(Set(topicAndPartition)).right.get(topicAndPartition)
    val leaderAddress = s"${leader._1}:${leader._2}"
    assert(leaderAddress === kafkaTestUtils.brokerAddress, "didn't get leader")

    val parts = kc.getPartitions(Set(topic)).right.get
    assert(parts(topicAndPartition), "didn't get partitions")

    val err = kc.getPartitions(Set(topic + "BAD"))
    assert(err.isLeft, "getPartitions for a nonexistant topic should be an error")
  }

  test("leader offset apis") {//指挥者偏移API
    val earliest = kc.getEarliestLeaderOffsets(Set(topicAndPartition)).right.get
    assert(earliest(topicAndPartition).offset === 0, "didn't get earliest")

    val latest = kc.getLatestLeaderOffsets(Set(topicAndPartition)).right.get
    assert(latest(topicAndPartition).offset === 1, "didn't get latest")
  }

  test("consumer offset apis") {//消费者偏移API
    val group = "kcsuitegroup" + Random.nextInt(10000)

    val offset = Random.nextInt(10000)

    val set = kc.setConsumerOffsets(group, Map(topicAndPartition -> offset))
    assert(set.isRight, "didn't set consumer offsets")

    val get = kc.getConsumerOffsets(group, Set(topicAndPartition)).right.get
    assert(get(topicAndPartition) === offset, "didn't get consumer offsets")
  }
} 
Example 7
Source File: KafkaClusterSuite.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.kafka

import scala.util.Random

import kafka.common.TopicAndPartition
import org.scalatest.BeforeAndAfterAll

import org.apache.spark.SparkFunSuite

class KafkaClusterSuite extends SparkFunSuite with BeforeAndAfterAll {
  private val topic = "kcsuitetopic" + Random.nextInt(10000)
  private val topicAndPartition = TopicAndPartition(topic, 0)
  private var kc: KafkaCluster = null

  private var kafkaTestUtils: KafkaTestUtils = _

  override def beforeAll() {
    kafkaTestUtils = new KafkaTestUtils
    kafkaTestUtils.setup()

    kafkaTestUtils.createTopic(topic)
    kafkaTestUtils.sendMessages(topic, Map("a" -> 1))
    kc = new KafkaCluster(Map("metadata.broker.list" -> kafkaTestUtils.brokerAddress))
  }

  override def afterAll() {
    if (kafkaTestUtils != null) {
      kafkaTestUtils.teardown()
      kafkaTestUtils = null
    }
  }

  test("metadata apis") {
    val leader = kc.findLeaders(Set(topicAndPartition)).right.get(topicAndPartition)
    val leaderAddress = s"${leader._1}:${leader._2}"
    assert(leaderAddress === kafkaTestUtils.brokerAddress, "didn't get leader")

    val parts = kc.getPartitions(Set(topic)).right.get
    assert(parts(topicAndPartition), "didn't get partitions")

    val err = kc.getPartitions(Set(topic + "BAD"))
    assert(err.isLeft, "getPartitions for a nonexistant topic should be an error")
  }

  test("leader offset apis") {
    val earliest = kc.getEarliestLeaderOffsets(Set(topicAndPartition)).right.get
    assert(earliest(topicAndPartition).offset === 0, "didn't get earliest")

    val latest = kc.getLatestLeaderOffsets(Set(topicAndPartition)).right.get
    assert(latest(topicAndPartition).offset === 1, "didn't get latest")
  }

  test("consumer offset apis") {
    val group = "kcsuitegroup" + Random.nextInt(10000)

    val offset = Random.nextInt(10000)

    val set = kc.setConsumerOffsets(group, Map(topicAndPartition -> offset))
    assert(set.isRight, "didn't set consumer offsets")

    val get = kc.getConsumerOffsets(group, Set(topicAndPartition)).right.get
    assert(get(topicAndPartition) === offset, "didn't get consumer offsets")
  }
} 
Example 8
Source File: KafkaClusterSuite.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.kafka

import scala.util.Random

import kafka.common.TopicAndPartition
import org.scalatest.BeforeAndAfterAll

import org.apache.spark.SparkFunSuite

class KafkaClusterSuite extends SparkFunSuite with BeforeAndAfterAll {
  private val topic = "kcsuitetopic" + Random.nextInt(10000)
  private val topicAndPartition = TopicAndPartition(topic, 0)
  private var kc: KafkaCluster = null

  private var kafkaTestUtils: KafkaTestUtils = _

  override def beforeAll() {
    kafkaTestUtils = new KafkaTestUtils
    kafkaTestUtils.setup()

    kafkaTestUtils.createTopic(topic)
    kafkaTestUtils.sendMessages(topic, Map("a" -> 1))
    kc = new KafkaCluster(Map("metadata.broker.list" -> kafkaTestUtils.brokerAddress))
  }

  override def afterAll() {
    if (kafkaTestUtils != null) {
      kafkaTestUtils.teardown()
      kafkaTestUtils = null
    }
  }

  test("metadata apis") {
    val leader = kc.findLeaders(Set(topicAndPartition)).right.get(topicAndPartition)
    val leaderAddress = s"${leader._1}:${leader._2}"
    assert(leaderAddress === kafkaTestUtils.brokerAddress, "didn't get leader")

    val parts = kc.getPartitions(Set(topic)).right.get
    assert(parts(topicAndPartition), "didn't get partitions")

    val err = kc.getPartitions(Set(topic + "BAD"))
    assert(err.isLeft, "getPartitions for a nonexistant topic should be an error")
  }

  test("leader offset apis") {
    val earliest = kc.getEarliestLeaderOffsets(Set(topicAndPartition)).right.get
    assert(earliest(topicAndPartition).offset === 0, "didn't get earliest")

    val latest = kc.getLatestLeaderOffsets(Set(topicAndPartition)).right.get
    assert(latest(topicAndPartition).offset === 1, "didn't get latest")
  }

  test("consumer offset apis") {
    val group = "kcsuitegroup" + Random.nextInt(10000)

    val offset = Random.nextInt(10000)

    val set = kc.setConsumerOffsets(group, Map(topicAndPartition -> offset))
    assert(set.isRight, "didn't set consumer offsets")

    val get = kc.getConsumerOffsets(group, Set(topicAndPartition)).right.get
    assert(get(topicAndPartition) === offset, "didn't get consumer offsets")
  }
} 
Example 9
Source File: KafkaInMessagesProviderTest.scala    From bandar-log   with Apache License 2.0 5 votes vote down vote up
package com.aol.one.dwh.bandarlog.providers

import com.aol.one.dwh.bandarlog.connectors.KafkaConnector
import com.aol.one.dwh.infra.config.Topic
import kafka.common.TopicAndPartition
import org.mockito.Mockito.when
import org.scalatest.FunSuite
import org.scalatest.mock.MockitoSugar


class KafkaInMessagesProviderTest extends FunSuite with MockitoSugar {

  private val kafkaConnector = mock[KafkaConnector]
  private val topic = Topic("topic_id", Set("topic_1", "topic_2"), "group_id")

  test("check count of in messages/heads over all topic partitions") {
    val heads = Some(Map(
      TopicAndPartition("topic_1", 1) -> 1L,
      TopicAndPartition("topic_2", 2) -> 2L,
      TopicAndPartition("topic_3", 3) -> 3L
    ))
    when(kafkaConnector.getHeads(topic)).thenReturn(heads)

    val result = new KafkaInMessagesProvider(kafkaConnector, topic).provide()

    assert(result.getValue.nonEmpty)
    assert(result.getValue.get == 6) // 1 + 2 + 3
  }

  test("check count of in messages/heads for empty heads result") {
    when(kafkaConnector.getHeads(topic)).thenReturn(Some(Map[TopicAndPartition, Long]()))

    val result = new KafkaInMessagesProvider(kafkaConnector, topic).provide()

    assert(result.getValue.nonEmpty)
    assert(result.getValue.get == 0)
  }

  test("return none if can't retrieve heads") {
    when(kafkaConnector.getHeads(topic)).thenReturn(None)

    val result = new KafkaInMessagesProvider(kafkaConnector, topic).provide()

    assert(result.getValue.isEmpty)
  }
} 
Example 10
Source File: KafkaLagProviderTest.scala    From bandar-log   with Apache License 2.0 5 votes vote down vote up
package com.aol.one.dwh.bandarlog.providers

import com.aol.one.dwh.infra.config.Topic
import com.aol.one.dwh.bandarlog.connectors.KafkaConnector
import kafka.common.TopicAndPartition
import org.mockito.Mockito.when
import org.scalatest.FunSuite
import org.scalatest.mock.MockitoSugar


class KafkaLagProviderTest extends FunSuite with MockitoSugar {

  private val kafkaConnector = mock[KafkaConnector]
  private val topic = Topic("topic_id", Set("topic_1", "topic_2", "topic_3"), "group_id")

  test("check lag per topic") {
    val heads = Map(
      TopicAndPartition("topic_1", 1) -> 4L,
      TopicAndPartition("topic_2", 2) -> 5L,
      TopicAndPartition("topic_3", 3) -> 6L
    )

    val offsets = Map(
      TopicAndPartition("topic_1", 1) -> 1L,
      TopicAndPartition("topic_2", 2) -> 2L,
      TopicAndPartition("topic_3", 3) -> 3L
    )
    val kafkaState = Option((heads, offsets))
    when(kafkaConnector.getKafkaState(topic)).thenReturn(kafkaState)

    val result = new KafkaLagProvider(kafkaConnector, topic).provide()

    // topic       partition  heads  offsets  lag
    // topic_1     1          4      1        4-1=3
    // topic_2     2          5      2        5-2=3
    // topic_3     3          6      3        6-3=3
    assert(result.getValue.nonEmpty)
    assert(result.getValue.get == 9) // lag sum 3 + 3 + 3
  }

  test("check 0 lag case per topic") {
    val heads = Map(
      TopicAndPartition("topic_1", 1) -> 1L,
      TopicAndPartition("topic_2", 2) -> 2L,
      TopicAndPartition("topic_3", 3) -> 3L
    )

    val offsets = Map(
      TopicAndPartition("topic_1", 1) -> 4L,
      TopicAndPartition("topic_2", 2) -> 5L,
      TopicAndPartition("topic_3", 3) -> 6L
    )
    val kafkaState = Option((heads, offsets))
    when(kafkaConnector.getKafkaState(topic)).thenReturn(kafkaState)

    val result = new KafkaLagProvider(kafkaConnector, topic).provide()

    // topic       partition  heads  offsets  lag
    // topic_1     1          1      4        1-4= -3
    // topic_2     2          2      5        2-5= -3
    // topic_3     3          3      6        3-6= -3
    assert(result.getValue.nonEmpty)
    assert(result.getValue.get == 0) // lag.max(0) = 0
  }

  test("check lag for empty heads and offsets") {
    val kafkaState = Option((Map[TopicAndPartition, Long](), Map[TopicAndPartition, Long]()))
    when(kafkaConnector.getKafkaState(topic)).thenReturn(kafkaState)

    val result = new KafkaLagProvider(kafkaConnector, topic).provide()

    assert(result.getValue.nonEmpty)
    assert(result.getValue.get == 0)
  }

  test("return none if can't retrieve kafka state") {
    when(kafkaConnector.getKafkaState(topic)).thenReturn(None)

    val result = new KafkaLagProvider(kafkaConnector, topic).provide()

    assert(result.getValue.isEmpty)
  }
} 
Example 11
Source File: KafkaOutMessagesProviderTest.scala    From bandar-log   with Apache License 2.0 5 votes vote down vote up
package com.aol.one.dwh.bandarlog.providers

import com.aol.one.dwh.bandarlog.connectors.KafkaConnector
import com.aol.one.dwh.infra.config.Topic
import kafka.common.TopicAndPartition
import org.mockito.Mockito.when
import org.scalatest.FunSuite
import org.scalatest.mock.MockitoSugar


class KafkaOutMessagesProviderTest extends FunSuite with MockitoSugar {

  private val kafkaConnector = mock[KafkaConnector]
  private val topic = Topic("topic_id", Set("topic_1", "topic_2"), "group_id")

  test("check count of out messages/offsets over all topic partitions") {
    val offsets = Option(Map(
      TopicAndPartition("topic_1", 1) -> 1L,
      TopicAndPartition("topic_2", 2) -> 2L,
      TopicAndPartition("topic_3", 3) -> 3L
    ))
    when(kafkaConnector.getOffsets(topic)).thenReturn(offsets)

    val result = new KafkaOutMessagesProvider(kafkaConnector, topic).provide()

    assert(result.getValue.nonEmpty)
    assert(result.getValue.get == 6) // 1 + 2 + 3
  }

  test("check count of out messages/offsets for empty offsets result") {
    when(kafkaConnector.getOffsets(topic)).thenReturn(Some(Map[TopicAndPartition, Long]()))

    val result = new KafkaOutMessagesProvider(kafkaConnector, topic).provide()

    assert(result.getValue.nonEmpty)
    assert(result.getValue.get == 0)
  }

  test("return none if can't retrieve offsets") {
    when(kafkaConnector.getOffsets(topic)).thenReturn(None)

    val result = new KafkaOutMessagesProvider(kafkaConnector, topic).provide()

    assert(result.getValue.isEmpty)
  }
} 
Example 12
Source File: KafkaConnector.scala    From bandar-log   with Apache License 2.0 5 votes vote down vote up
package com.aol.one.dwh.bandarlog.connectors

import com.aol.one.dwh.bandarlog.connectors.KafkaConnector._
import com.aol.one.dwh.infra.config.Topic
import com.aol.one.dwh.infra.kafka.KafkaCluster
import com.aol.one.dwh.infra.util.LogTrait
import kafka.common.TopicAndPartition

object KafkaConnector {
  type Offset = Long
  type KafkaPartitions = Set[TopicAndPartition]
  type KafkaHeads = Map[TopicAndPartition, Offset]
  type KafkaOffsets = Map[TopicAndPartition, Offset]
  type KafkaState = (KafkaHeads, KafkaOffsets)
}


class KafkaConnector(kafkaCluster: KafkaCluster) extends LogTrait {

  def getKafkaState(topic: Topic): Option[KafkaState] =
    for {
      heads      <- getHeads(topic)
      offsets    <- getOffsets(topic)
    } yield (heads, offsets)

  def getHeads(topic: Topic): Option[KafkaHeads] = {
    kafkaCluster.getLatestOffsets(topic.groupId, topic.values) match {
      case Left(l) =>
        logger.error(s"Cannot obtain leaders offsets for topic:[${topic.values}], cause {}", l.toString)
        None
      case Right(r) => Some(r.map { case (key, value) => key -> value })
    }
  }

  def getOffsets(topic: Topic): Option[KafkaOffsets] = {
    kafkaCluster.getConsumerOffsets(topic.groupId, topic.values ) match {
      case Left(l) =>
        logger.error(s"Cannot obtain consumers offsets for topic:[${topic.values}], cause {}", l.toString)
        None
      case Right(r) => Some(r)
    }
  }
} 
Example 13
Source File: KafkaClusterSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.kafka

import scala.util.Random

import kafka.common.TopicAndPartition
import org.scalatest.BeforeAndAfterAll

import org.apache.spark.SparkFunSuite

class KafkaClusterSuite extends SparkFunSuite with BeforeAndAfterAll {
  private val topic = "kcsuitetopic" + Random.nextInt(10000)
  private val topicAndPartition = TopicAndPartition(topic, 0)
  private var kc: KafkaCluster = null

  private var kafkaTestUtils: KafkaTestUtils = _

  override def beforeAll() {
    kafkaTestUtils = new KafkaTestUtils
    kafkaTestUtils.setup()

    kafkaTestUtils.createTopic(topic)
    kafkaTestUtils.sendMessages(topic, Map("a" -> 1))
    kc = new KafkaCluster(Map("metadata.broker.list" -> kafkaTestUtils.brokerAddress))
  }

  override def afterAll() {
    if (kafkaTestUtils != null) {
      kafkaTestUtils.teardown()
      kafkaTestUtils = null
    }
  }

  test("metadata apis") {
    val leader = kc.findLeaders(Set(topicAndPartition)).right.get(topicAndPartition)
    val leaderAddress = s"${leader._1}:${leader._2}"
    assert(leaderAddress === kafkaTestUtils.brokerAddress, "didn't get leader")

    val parts = kc.getPartitions(Set(topic)).right.get
    assert(parts(topicAndPartition), "didn't get partitions")

    val err = kc.getPartitions(Set(topic + "BAD"))
    assert(err.isLeft, "getPartitions for a nonexistant topic should be an error")
  }

  test("leader offset apis") {
    val earliest = kc.getEarliestLeaderOffsets(Set(topicAndPartition)).right.get
    assert(earliest(topicAndPartition).offset === 0, "didn't get earliest")

    val latest = kc.getLatestLeaderOffsets(Set(topicAndPartition)).right.get
    assert(latest(topicAndPartition).offset === 1, "didn't get latest")
  }

  test("consumer offset apis") {
    val group = "kcsuitegroup" + Random.nextInt(10000)

    val offset = Random.nextInt(10000)

    val set = kc.setConsumerOffsets(group, Map(topicAndPartition -> offset))
    assert(set.isRight, "didn't set consumer offsets")

    val get = kc.getConsumerOffsets(group, Set(topicAndPartition)).right.get
    assert(get(topicAndPartition) === offset, "didn't get consumer offsets")
  }
} 
Example 14
Source File: KafkaSourceOffset.scala    From spark-kafka-0-8-sql   with Apache License 2.0 5 votes vote down vote up
package com.hortonworks.spark.sql.kafka08

import kafka.common.TopicAndPartition
import org.apache.spark.sql.execution.streaming.Offset
import org.apache.spark.streaming.kafka.KafkaCluster.LeaderOffset


object KafkaSourceOffset {

  def getPartitionOffsets(offset: Offset): Map[TopicAndPartition, LeaderOffset] = {
    offset match {
      case o: KafkaSourceOffset => o.partitionToOffsets
      case _ =>
        throw new IllegalArgumentException(
          s"Invalid conversion from offset of ${offset.getClass} to KafkaSourceOffset")
    }
  }
} 
Example 15
Source File: EtlProcessor.scala    From etl-light   with MIT License 5 votes vote down vote up
package yamrcraft.etlite.processors

import kafka.common.TopicAndPartition
import kafka.message.MessageAndMetadata
import kafka.serializer.DefaultDecoder
import org.apache.spark._
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.kafka._
import org.slf4j.LoggerFactory
import yamrcraft.etlite.Settings
import yamrcraft.etlite.state.{KafkaOffsetsState, KafkaStateManager}
import yamrcraft.etlite.transformers.InboundMessage

object EtlProcessor {

  val logger = LoggerFactory.getLogger(this.getClass)

  def run(settings: Settings) = {
    val context = createContext(settings)

    val stateManager = new KafkaStateManager(settings.etl.state)

    val lastState = stateManager.readState
    logger.info(s"last persisted state: $lastState")

    val currState = stateManager.fetchNextState(lastState, settings)
    logger.info(s"batch working state: $currState")

    val rdd = createRDD(context, currState, settings)
    processRDD(rdd, currState.jobId, settings)

    logger.info("committing state")
    stateManager.commitState(currState)
  }

  private def createContext(settings: Settings) = {
    val sparkConf = new SparkConf()
      .setAppName(settings.spark.appName)
      .setAll(settings.spark.conf)

    new SparkContext(sparkConf)
  }

  private def createRDD(context: SparkContext, state: KafkaOffsetsState, settings: Settings): RDD[InboundMessage] = {
    KafkaUtils.createRDD[Array[Byte], Array[Byte], DefaultDecoder, DefaultDecoder, InboundMessage](
      context,
      settings.kafka.properties,
      state.ranges.toArray,
      Map[TopicAndPartition, Broker](),
      (msgAndMeta: MessageAndMetadata[Array[Byte], Array[Byte]]) => { InboundMessage(msgAndMeta.topic, msgAndMeta.key(), msgAndMeta.message()) }
    )
  }

  private def processRDD(kafkaRDD: RDD[InboundMessage], jobId: Long, settings: Settings) = {
    // passed to remote workers
    val etlSettings = settings.etl

    logger.info(s"RDD processing started [rdd=${kafkaRDD.id}, jobId=$jobId]")

    val rdd = settings.etl.maxNumOfOutputFiles.map(kafkaRDD.coalesce(_)).getOrElse(kafkaRDD)

    rdd.foreachPartition { partition =>
        // executed at the worker
        new PartitionProcessor(jobId, TaskContext.get.partitionId(), etlSettings)
          .processPartition(partition)
      }

    logger.info(s"RDD processing ended [rdd=${kafkaRDD.id}, jobId=$jobId]")
  }


} 
Example 16
Source File: KafkaClusterSuite.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.kafka

import scala.util.Random

import kafka.common.TopicAndPartition
import org.scalatest.BeforeAndAfterAll

import org.apache.spark.SparkFunSuite

class KafkaClusterSuite extends SparkFunSuite with BeforeAndAfterAll {
  private val topic = "kcsuitetopic" + Random.nextInt(10000)
  private val topicAndPartition = TopicAndPartition(topic, 0)
  private var kc: KafkaCluster = null

  private var kafkaTestUtils: KafkaTestUtils = _

  override def beforeAll() {
    kafkaTestUtils = new KafkaTestUtils
    kafkaTestUtils.setup()

    kafkaTestUtils.createTopic(topic)
    kafkaTestUtils.sendMessages(topic, Map("a" -> 1))
    kc = new KafkaCluster(Map("metadata.broker.list" -> kafkaTestUtils.brokerAddress))
  }

  override def afterAll() {
    if (kafkaTestUtils != null) {
      kafkaTestUtils.teardown()
      kafkaTestUtils = null
    }
  }

  test("metadata apis") {
    val leader = kc.findLeaders(Set(topicAndPartition)).right.get(topicAndPartition)
    val leaderAddress = s"${leader._1}:${leader._2}"
    assert(leaderAddress === kafkaTestUtils.brokerAddress, "didn't get leader")

    val parts = kc.getPartitions(Set(topic)).right.get
    assert(parts(topicAndPartition), "didn't get partitions")

    val err = kc.getPartitions(Set(topic + "BAD"))
    assert(err.isLeft, "getPartitions for a nonexistant topic should be an error")
  }

  test("leader offset apis") {
    val earliest = kc.getEarliestLeaderOffsets(Set(topicAndPartition)).right.get
    assert(earliest(topicAndPartition).offset === 0, "didn't get earliest")

    val latest = kc.getLatestLeaderOffsets(Set(topicAndPartition)).right.get
    assert(latest(topicAndPartition).offset === 1, "didn't get latest")
  }

  test("consumer offset apis") {
    val group = "kcsuitegroup" + Random.nextInt(10000)

    val offset = Random.nextInt(10000)

    val set = kc.setConsumerOffsets(group, Map(topicAndPartition -> offset))
    assert(set.isRight, "didn't set consumer offsets")

    val get = kc.getConsumerOffsets(group, Set(topicAndPartition)).right.get
    assert(get(topicAndPartition) === offset, "didn't get consumer offsets")
  }
} 
Example 17
Source File: DefaultPartitionGrouperSpec.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.kafka.lib.source.grouper

import kafka.common.TopicAndPartition
import org.scalacheck.Gen
import org.scalatest.prop.PropertyChecks
import org.scalatest.{Matchers, PropSpec}

class DefaultPartitionGrouperSpec extends PropSpec with PropertyChecks with Matchers {
  property("KafkaDefaultGrouper should group TopicAndPartitions in a round-robin way") {
    forAll(Gen.posNum[Int], Gen.posNum[Int], Gen.posNum[Int]) {
      (topicNum: Int, partitionNum: Int, taskNum: Int) => {
        val topicAndPartitions = for {
          t <- 0.until(topicNum)
          p <- 0.until(partitionNum)
        } yield TopicAndPartition("topic" + t, p)
        0.until(taskNum).foreach { taskIndex =>
          val grouper = new DefaultPartitionGrouper
          grouper.group(taskNum, taskIndex, topicAndPartitions.toArray).forall(
            tp => topicAndPartitions.indexOf(tp) % taskNum == taskIndex)
        }
      }
    }
  }
} 
Example 18
Source File: KafkaConsumerSpec.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.kafka.lib.source.consumer

import com.twitter.bijection.Injection
import kafka.api.OffsetRequest
import kafka.common.TopicAndPartition
import kafka.consumer.SimpleConsumer
import kafka.message.{Message, MessageAndOffset}
import org.mockito.Mockito._
import org.scalacheck.Gen
import org.scalatest.mock.MockitoSugar
import org.scalatest.prop.PropertyChecks
import org.scalatest.{Matchers, PropSpec}

class KafkaConsumerSpec extends PropSpec with PropertyChecks with Matchers with MockitoSugar {
  val messageGen = Gen.alphaStr map (msg => new Message(Injection[String, Array[Byte]](msg)))
  val messageNumGen = Gen.choose[Int](0, 1000)
  val topicAndPartitionGen = for {
    topic <- Gen.alphaStr
    partition <- Gen.choose[Int](0, Int.MaxValue)
  } yield (topic, partition)

  property("KafkaConsumer should iterate MessageAndOffset calling hasNext and next") {
    forAll(messageGen, messageNumGen, topicAndPartitionGen) {
      (message: Message, num: Int, topicAndPartition: (String, Int)) =>
        val (topic, partition) = topicAndPartition
        val consumer = mock[SimpleConsumer]
        when(consumer.earliestOrLatestOffset(TopicAndPartition(topic, partition),
          OffsetRequest.EarliestTime, -1)).thenReturn(0)
        val iterator = 0.until(num).map(index => MessageAndOffset(message, index.toLong)).iterator
        val getIterator = (offset: Long) => iterator
        val kafkaConsumer = new KafkaConsumer(consumer, topic, partition, getIterator)
        0.until(num).foreach { i =>
          kafkaConsumer.hasNext shouldBe true
          val kafkaMessage = kafkaConsumer.next
          kafkaMessage.offset shouldBe i.toLong
          kafkaMessage.key shouldBe None
        }
        kafkaConsumer.hasNext shouldBe false
    }
  }

  val startOffsetGen = Gen.choose[Long](1L, 1000L)
  property("KafkaConsumer setStartOffset should reset internal iterator") {
    forAll(topicAndPartitionGen, startOffsetGen) {
      (topicAndPartition: (String, Int), startOffset: Long) =>
        val (topic, partition) = topicAndPartition
        val consumer = mock[SimpleConsumer]
        val getIterator = mock[Long => Iterator[MessageAndOffset]]
        when(consumer.earliestOrLatestOffset(TopicAndPartition(topic, partition),
          OffsetRequest.EarliestTime, -1)).thenReturn(0)
        val kafkaConsumer = new KafkaConsumer(consumer, topic, partition, getIterator)
        kafkaConsumer.setStartOffset(startOffset)
        verify(getIterator).apply(startOffset)
    }
  }

  property("KafkaConsumer close should close SimpleConsumer") {
    forAll(topicAndPartitionGen) {
      (topicAndPartition: (String, Int)) =>
        val (topic, partition) = topicAndPartition
        val consumer = mock[SimpleConsumer]
        when(consumer.earliestOrLatestOffset(TopicAndPartition(topic, partition),
          OffsetRequest.EarliestTime, -1)).thenReturn(0)
        val getIterator = mock[Long => Iterator[MessageAndOffset]]
        val kafkaConsumer = new KafkaConsumer(consumer, topic, partition, getIterator)
        kafkaConsumer.close()
        verify(consumer).close()
    }
  }
} 
Example 19
Source File: KafkaConsumer.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.kafka.lib.source.consumer

import kafka.api.{FetchRequestBuilder, OffsetRequest}
import kafka.common.ErrorMapping._
import kafka.common.TopicAndPartition
import kafka.consumer.SimpleConsumer
import kafka.message.MessageAndOffset
import kafka.utils.Utils

object KafkaConsumer {
  def apply(topic: String, partition: Int, startOffsetTime: Long,
      fetchSize: Int, consumer: SimpleConsumer): KafkaConsumer = {
    val getIterator = (offset: Long) => {
      val request = new FetchRequestBuilder()
        .addFetch(topic, partition, offset, fetchSize)
        .build()

      val response = consumer.fetch(request)
      response.errorCode(topic, partition) match {
        case NoError => response.messageSet(topic, partition).iterator
        case error => throw exceptionFor(error)
      }
    }
    new KafkaConsumer(consumer, topic, partition, getIterator, startOffsetTime)
  }
}


class KafkaConsumer(consumer: SimpleConsumer,
    topic: String,
    partition: Int,
    getIterator: (Long) => Iterator[MessageAndOffset],
    startOffsetTime: Long = OffsetRequest.EarliestTime) {
  private val earliestOffset = consumer
    .earliestOrLatestOffset(TopicAndPartition(topic, partition), startOffsetTime, -1)
  private var nextOffset: Long = earliestOffset
  private var iterator: Iterator[MessageAndOffset] = getIterator(nextOffset)

  def setStartOffset(startOffset: Long): Unit = {
    nextOffset = startOffset
    iterator = getIterator(nextOffset)
  }

  def next(): KafkaMessage = {
    val mo = iterator.next()
    val message = mo.message

    nextOffset = mo.nextOffset

    val offset = mo.offset
    val payload = Utils.readBytes(message.payload)
    new KafkaMessage(topic, partition, offset, Option(message.key).map(Utils.readBytes), payload)
  }

  def hasNext: Boolean = {
    @annotation.tailrec
    def hasNextHelper(iter: Iterator[MessageAndOffset], newIterator: Boolean): Boolean = {
      if (iter.hasNext) true
      else if (newIterator) false
      else {
        iterator = getIterator(nextOffset)
        hasNextHelper(iterator, newIterator = true)
      }
    }
    hasNextHelper(iterator, newIterator = false)
  }

  def getNextOffset: Long = nextOffset

  def close(): Unit = {
    consumer.close()
  }
} 
Example 20
Source File: FetchThread.scala    From incubator-retired-gearpump   with Apache License 2.0 5 votes vote down vote up
package org.apache.gearpump.streaming.kafka.lib.source.consumer

import java.nio.channels.ClosedByInterruptException
import java.util.concurrent.LinkedBlockingQueue

import kafka.common.TopicAndPartition
import org.apache.gearpump.streaming.kafka.lib.util.KafkaClient
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.slf4j.Logger

import org.apache.gearpump.util.LogUtil

object FetchThread {
  private val LOG: Logger = LogUtil.getLogger(classOf[FetchThread])

  val factory = new FetchThreadFactory

  class FetchThreadFactory extends java.io.Serializable {
    def getFetchThread(config: KafkaConfig, client: KafkaClient): FetchThread = {
      val fetchThreshold = config.getInt(KafkaConfig.FETCH_THRESHOLD_CONFIG)
      val fetchSleepMS = config.getLong(KafkaConfig.FETCH_SLEEP_MS_CONFIG)
      val startOffsetTime = config.getLong(KafkaConfig.CONSUMER_START_OFFSET_CONFIG)
      FetchThread(fetchThreshold, fetchSleepMS, startOffsetTime, client)
    }
  }

  def apply(fetchThreshold: Int,
      fetchSleepMS: Long,
      startOffsetTime: Long,
      client: KafkaClient): FetchThread = {
    val createConsumer = (tp: TopicAndPartition) =>
      client.createConsumer(tp.topic, tp.partition, startOffsetTime)
    val incomingQueue = new LinkedBlockingQueue[KafkaMessage]()
    val sleeper = new ExponentialBackoffSleeper(
      backOffMultiplier = 2.0,
      initialDurationMs = 100L,
      maximumDurationMs = 10000L)
    new FetchThread(createConsumer, incomingQueue, sleeper, fetchThreshold, fetchSleepMS)
  }
}


  private def fetchMessage: Boolean = {
    if (incomingQueue.size >= fetchThreshold) {
      false
    } else {
      consumers.foldLeft(false) { (hasNext, tpAndConsumer) =>
        val (_, consumer) = tpAndConsumer
        if (consumer.hasNext) {
          incomingQueue.put(consumer.next())
          true
        } else {
          hasNext
        }
      }
    }
  }

  private def createAllConsumers: Map[TopicAndPartition, KafkaConsumer] = {
    topicAndPartitions.map(tp => tp -> createConsumer(tp)).toMap
  }

  private def resetConsumers(nextOffsets: Map[TopicAndPartition, Long]): Unit = {
    consumers.values.foreach(_.close())
    consumers = createAllConsumers
    consumers.foreach { case (tp, consumer) =>
      consumer.setStartOffset(nextOffsets(tp))
    }
  }
} 
Example 21
Source File: CheckpointedDirectKafkaInputDStream.scala    From streamliner-examples   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.kafka



    prevOffsets = currentOffsets
    currentOffsets = untilOffsets.map(kv => kv._1 -> kv._2.offset)

    prevOffsets == currentOffsets match {
      case false => Some(rdd)
      case true => None
    }
  }

  def getCurrentOffsets(): Map[TopicAndPartition, Long] = currentOffsets
  def setCurrentOffsets(offsets: Map[TopicAndPartition, Long]): Unit = {
    currentOffsets = offsets
  }
} 
Example 22
Source File: StreamHelper.scala    From incubator-s2graph   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.kafka

import kafka.KafkaHelper
import kafka.common.TopicAndPartition
import kafka.consumer.PartitionTopicInfo
import kafka.message.MessageAndMetadata
import kafka.serializer.Decoder
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.{Logging, SparkException}
import scala.reflect.ClassTag

case class StreamHelper(kafkaParams: Map[String, String]) extends Logging {
  // helper for kafka zookeeper
  lazy val kafkaHelper = KafkaHelper(kafkaParams)
  lazy val kc = new KafkaCluster(kafkaParams)

  // 1. get leader's earliest and latest offset
  // 2. get consumer offset
  // 3-1. if (2) is bounded in (1) use (2) for stream
  // 3-2. else use (1) by "auto.offset.reset"
  private def getStartOffsets(topics: Set[String]): Map[TopicAndPartition, Long] = {
    lazy val reset = kafkaParams.get("auto.offset.reset").map(_.toLowerCase)
    lazy val consumerOffsets = kafkaHelper.getConsumerOffsets(topics.toSeq)

    {
      for {
        topicPartitions <- kc.getPartitions(topics).right
        smallOffsets <- kc.getEarliestLeaderOffsets(topicPartitions).right
        largeOffsets <- kc.getLatestLeaderOffsets(topicPartitions).right
      } yield {
        {
          for {
            tp <- topicPartitions
          } yield {
            val co = consumerOffsets.getOrElse(tp, PartitionTopicInfo.InvalidOffset)
            val so = smallOffsets.get(tp).map(_.offset).get
            val lo = largeOffsets.get(tp).map(_.offset).get

            logWarning(s"$tp: $co $so $lo")

            if (co >= so && co <= lo) {
              (tp, co)
            } else {
              (tp, reset match {
                case Some("smallest") => so
                case _ => lo
              })
            }
          }
        }.toMap
      }
    }.fold(errs => throw new SparkException(errs.mkString("\n")), ok => ok)
  }

  def createStream[K: ClassTag, V: ClassTag, KD <: Decoder[K]: ClassTag, VD <: Decoder[V]: ClassTag](ssc: StreamingContext, topics: Set[String]): InputDStream[(K, V)] = {
    type R = (K, V)
    val messageHandler = (mmd: MessageAndMetadata[K, V]) => (mmd.key(), mmd.message())

    kafkaHelper.registerConsumerInZK(topics)

    new DirectKafkaInputDStream[K, V, KD, VD, R](ssc, kafkaParams, getStartOffsets(topics), messageHandler)
  }

  def commitConsumerOffsets(offsets: HasOffsetRanges): Unit = {
    val offsetsMap = {
      for {
        range <- offsets.offsetRanges if range.fromOffset < range.untilOffset
      } yield {
        logDebug(range.toString())
        TopicAndPartition(range.topic, range.partition) -> range.untilOffset
      }
    }.toMap

    kafkaHelper.commitConsumerOffsets(offsetsMap)
  }

  def commitConsumerOffset(range: OffsetRange): Unit = {
    if (range.fromOffset < range.untilOffset) {
      try {
        val tp = TopicAndPartition(range.topic, range.partition)
        logDebug("Committed offset " + range.untilOffset + " for topic " + tp)
        kafkaHelper.commitConsumerOffset(tp, range.untilOffset)
      } catch {
        case t: Throwable =>
          // log it and let it go
          logWarning("exception during commitOffsets",  t)
          throw t
      }
    }
  }

  def commitConsumerOffsets[R](stream: InputDStream[R]): Unit = {
    stream.foreachRDD { rdd =>
      commitConsumerOffsets(rdd.asInstanceOf[HasOffsetRanges])
    }
  }
}