kafka.message.MessageAndMetadata Scala Examples

The following examples show how to use kafka.message.MessageAndMetadata. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: StreamHelper.scala    From incubator-s2graph   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.kafka

import kafka.KafkaHelper
import kafka.common.TopicAndPartition
import kafka.consumer.PartitionTopicInfo
import kafka.message.MessageAndMetadata
import kafka.serializer.Decoder
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.{Logging, SparkException}
import scala.reflect.ClassTag

case class StreamHelper(kafkaParams: Map[String, String]) extends Logging {
  // helper for kafka zookeeper
  lazy val kafkaHelper = KafkaHelper(kafkaParams)
  lazy val kc = new KafkaCluster(kafkaParams)

  // 1. get leader's earliest and latest offset
  // 2. get consumer offset
  // 3-1. if (2) is bounded in (1) use (2) for stream
  // 3-2. else use (1) by "auto.offset.reset"
  private def getStartOffsets(topics: Set[String]): Map[TopicAndPartition, Long] = {
    lazy val reset = kafkaParams.get("auto.offset.reset").map(_.toLowerCase)
    lazy val consumerOffsets = kafkaHelper.getConsumerOffsets(topics.toSeq)

    {
      for {
        topicPartitions <- kc.getPartitions(topics).right
        smallOffsets <- kc.getEarliestLeaderOffsets(topicPartitions).right
        largeOffsets <- kc.getLatestLeaderOffsets(topicPartitions).right
      } yield {
        {
          for {
            tp <- topicPartitions
          } yield {
            val co = consumerOffsets.getOrElse(tp, PartitionTopicInfo.InvalidOffset)
            val so = smallOffsets.get(tp).map(_.offset).get
            val lo = largeOffsets.get(tp).map(_.offset).get

            logWarning(s"$tp: $co $so $lo")

            if (co >= so && co <= lo) {
              (tp, co)
            } else {
              (tp, reset match {
                case Some("smallest") => so
                case _ => lo
              })
            }
          }
        }.toMap
      }
    }.fold(errs => throw new SparkException(errs.mkString("\n")), ok => ok)
  }

  def createStream[K: ClassTag, V: ClassTag, KD <: Decoder[K]: ClassTag, VD <: Decoder[V]: ClassTag](ssc: StreamingContext, topics: Set[String]): InputDStream[(K, V)] = {
    type R = (K, V)
    val messageHandler = (mmd: MessageAndMetadata[K, V]) => (mmd.key(), mmd.message())

    kafkaHelper.registerConsumerInZK(topics)

    new DirectKafkaInputDStream[K, V, KD, VD, R](ssc, kafkaParams, getStartOffsets(topics), messageHandler)
  }

  def commitConsumerOffsets(offsets: HasOffsetRanges): Unit = {
    val offsetsMap = {
      for {
        range <- offsets.offsetRanges if range.fromOffset < range.untilOffset
      } yield {
        logDebug(range.toString())
        TopicAndPartition(range.topic, range.partition) -> range.untilOffset
      }
    }.toMap

    kafkaHelper.commitConsumerOffsets(offsetsMap)
  }

  def commitConsumerOffset(range: OffsetRange): Unit = {
    if (range.fromOffset < range.untilOffset) {
      try {
        val tp = TopicAndPartition(range.topic, range.partition)
        logDebug("Committed offset " + range.untilOffset + " for topic " + tp)
        kafkaHelper.commitConsumerOffset(tp, range.untilOffset)
      } catch {
        case t: Throwable =>
          // log it and let it go
          logWarning("exception during commitOffsets",  t)
          throw t
      }
    }
  }

  def commitConsumerOffsets[R](stream: InputDStream[R]): Unit = {
    stream.foreachRDD { rdd =>
      commitConsumerOffsets(rdd.asInstanceOf[HasOffsetRanges])
    }
  }
} 
Example 2
Source File: CheckpointedDirectKafkaInputDStream.scala    From streamliner-examples   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.kafka



    prevOffsets = currentOffsets
    currentOffsets = untilOffsets.map(kv => kv._1 -> kv._2.offset)

    prevOffsets == currentOffsets match {
      case false => Some(rdd)
      case true => None
    }
  }

  def getCurrentOffsets(): Map[TopicAndPartition, Long] = currentOffsets
  def setCurrentOffsets(offsets: Map[TopicAndPartition, Long]): Unit = {
    currentOffsets = offsets
  }
} 
Example 3
Source File: EtlProcessor.scala    From etl-light   with MIT License 5 votes vote down vote up
package yamrcraft.etlite.processors

import kafka.common.TopicAndPartition
import kafka.message.MessageAndMetadata
import kafka.serializer.DefaultDecoder
import org.apache.spark._
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.kafka._
import org.slf4j.LoggerFactory
import yamrcraft.etlite.Settings
import yamrcraft.etlite.state.{KafkaOffsetsState, KafkaStateManager}
import yamrcraft.etlite.transformers.InboundMessage

object EtlProcessor {

  val logger = LoggerFactory.getLogger(this.getClass)

  def run(settings: Settings) = {
    val context = createContext(settings)

    val stateManager = new KafkaStateManager(settings.etl.state)

    val lastState = stateManager.readState
    logger.info(s"last persisted state: $lastState")

    val currState = stateManager.fetchNextState(lastState, settings)
    logger.info(s"batch working state: $currState")

    val rdd = createRDD(context, currState, settings)
    processRDD(rdd, currState.jobId, settings)

    logger.info("committing state")
    stateManager.commitState(currState)
  }

  private def createContext(settings: Settings) = {
    val sparkConf = new SparkConf()
      .setAppName(settings.spark.appName)
      .setAll(settings.spark.conf)

    new SparkContext(sparkConf)
  }

  private def createRDD(context: SparkContext, state: KafkaOffsetsState, settings: Settings): RDD[InboundMessage] = {
    KafkaUtils.createRDD[Array[Byte], Array[Byte], DefaultDecoder, DefaultDecoder, InboundMessage](
      context,
      settings.kafka.properties,
      state.ranges.toArray,
      Map[TopicAndPartition, Broker](),
      (msgAndMeta: MessageAndMetadata[Array[Byte], Array[Byte]]) => { InboundMessage(msgAndMeta.topic, msgAndMeta.key(), msgAndMeta.message()) }
    )
  }

  private def processRDD(kafkaRDD: RDD[InboundMessage], jobId: Long, settings: Settings) = {
    // passed to remote workers
    val etlSettings = settings.etl

    logger.info(s"RDD processing started [rdd=${kafkaRDD.id}, jobId=$jobId]")

    val rdd = settings.etl.maxNumOfOutputFiles.map(kafkaRDD.coalesce(_)).getOrElse(kafkaRDD)

    rdd.foreachPartition { partition =>
        // executed at the worker
        new PartitionProcessor(jobId, TaskContext.get.partitionId(), etlSettings)
          .processPartition(partition)
      }

    logger.info(s"RDD processing ended [rdd=${kafkaRDD.id}, jobId=$jobId]")
  }


} 
Example 4
Source File: KafkaJsonConsumerSpec.scala    From coral   with Apache License 2.0 5 votes vote down vote up
package io.coral.lib

import java.util.Properties

import kafka.consumer._
import kafka.message.MessageAndMetadata
import org.json4s.JsonAST.{JNothing, JValue}
import org.json4s.jackson.JsonMethods._
import org.mockito.Mockito._
import org.scalatest.mock.MockitoSugar
import org.scalatest.{Matchers, WordSpec}

class KafkaJsonConsumerSpec extends WordSpec with Matchers with MockitoSugar {
	"KafkaJsonConsumer" should {
		"provide a stream" in {
			val consumer = KafkaJsonConsumer()
			intercept[IllegalArgumentException] {
        		consumer.stream("abc", new Properties())
      		}
		}
	}

	"KafkaJsonStream" should {
		val fakeConnection = mock[ConsumerConnector]
		doNothing.when(fakeConnection).commitOffsets

		val fakeMessage = mock[MessageAndMetadata[Array[Byte], JValue]]
		when(fakeMessage.key()).thenReturn("TestKey".getBytes)
		when(fakeMessage.message()).thenReturn(parse( """{ "json": "test" }"""))

		val fakeIterator = mock[ConsumerIterator[Array[Byte], JValue]]
		when(fakeIterator.hasNext()).thenReturn(true).thenReturn(false)
		when(fakeIterator.next()).thenReturn(fakeMessage)

		val fakeStream = mock[KafkaStream[Array[Byte], JValue]]
		when(fakeStream.iterator()).thenReturn(fakeIterator)

		"provide a next value" in {
			val kjs = new KafkaJsonStream(fakeConnection, fakeStream)
			kjs.hasNextInTime shouldBe true
			kjs.next shouldBe parse( """{ "json": "test" }""")
		}
	}

	"JsonDecoder" should {
		"convert bytes to Json object" in {
			val jsonString = """{ "hello": "json" }"""
			val bytes = jsonString.getBytes
			val jsonValue = parse(jsonString)
			JsonDecoder.fromBytes(bytes) shouldBe jsonValue
		}

		"return JNothing for invalid JSon" in {
			val jsonString = """hello"""
			val bytes = jsonString.getBytes
			JsonDecoder.fromBytes(bytes) shouldBe JNothing
		}
	}
}