kafka.serializer.Decoder Scala Examples

The following examples show how to use kafka.serializer.Decoder. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: StreamHelper.scala    From incubator-s2graph   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.kafka

import kafka.KafkaHelper
import kafka.common.TopicAndPartition
import kafka.consumer.PartitionTopicInfo
import kafka.message.MessageAndMetadata
import kafka.serializer.Decoder
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.{Logging, SparkException}
import scala.reflect.ClassTag

case class StreamHelper(kafkaParams: Map[String, String]) extends Logging {
  // helper for kafka zookeeper
  lazy val kafkaHelper = KafkaHelper(kafkaParams)
  lazy val kc = new KafkaCluster(kafkaParams)

  // 1. get leader's earliest and latest offset
  // 2. get consumer offset
  // 3-1. if (2) is bounded in (1) use (2) for stream
  // 3-2. else use (1) by "auto.offset.reset"
  private def getStartOffsets(topics: Set[String]): Map[TopicAndPartition, Long] = {
    lazy val reset = kafkaParams.get("auto.offset.reset").map(_.toLowerCase)
    lazy val consumerOffsets = kafkaHelper.getConsumerOffsets(topics.toSeq)

    {
      for {
        topicPartitions <- kc.getPartitions(topics).right
        smallOffsets <- kc.getEarliestLeaderOffsets(topicPartitions).right
        largeOffsets <- kc.getLatestLeaderOffsets(topicPartitions).right
      } yield {
        {
          for {
            tp <- topicPartitions
          } yield {
            val co = consumerOffsets.getOrElse(tp, PartitionTopicInfo.InvalidOffset)
            val so = smallOffsets.get(tp).map(_.offset).get
            val lo = largeOffsets.get(tp).map(_.offset).get

            logWarning(s"$tp: $co $so $lo")

            if (co >= so && co <= lo) {
              (tp, co)
            } else {
              (tp, reset match {
                case Some("smallest") => so
                case _ => lo
              })
            }
          }
        }.toMap
      }
    }.fold(errs => throw new SparkException(errs.mkString("\n")), ok => ok)
  }

  def createStream[K: ClassTag, V: ClassTag, KD <: Decoder[K]: ClassTag, VD <: Decoder[V]: ClassTag](ssc: StreamingContext, topics: Set[String]): InputDStream[(K, V)] = {
    type R = (K, V)
    val messageHandler = (mmd: MessageAndMetadata[K, V]) => (mmd.key(), mmd.message())

    kafkaHelper.registerConsumerInZK(topics)

    new DirectKafkaInputDStream[K, V, KD, VD, R](ssc, kafkaParams, getStartOffsets(topics), messageHandler)
  }

  def commitConsumerOffsets(offsets: HasOffsetRanges): Unit = {
    val offsetsMap = {
      for {
        range <- offsets.offsetRanges if range.fromOffset < range.untilOffset
      } yield {
        logDebug(range.toString())
        TopicAndPartition(range.topic, range.partition) -> range.untilOffset
      }
    }.toMap

    kafkaHelper.commitConsumerOffsets(offsetsMap)
  }

  def commitConsumerOffset(range: OffsetRange): Unit = {
    if (range.fromOffset < range.untilOffset) {
      try {
        val tp = TopicAndPartition(range.topic, range.partition)
        logDebug("Committed offset " + range.untilOffset + " for topic " + tp)
        kafkaHelper.commitConsumerOffset(tp, range.untilOffset)
      } catch {
        case t: Throwable =>
          // log it and let it go
          logWarning("exception during commitOffsets",  t)
          throw t
      }
    }
  }

  def commitConsumerOffsets[R](stream: InputDStream[R]): Unit = {
    stream.foreachRDD { rdd =>
      commitConsumerOffsets(rdd.asInstanceOf[HasOffsetRanges])
    }
  }
} 
Example 2
Source File: CheckpointedDirectKafkaInputDStream.scala    From streamliner-examples   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streaming.kafka



    prevOffsets = currentOffsets
    currentOffsets = untilOffsets.map(kv => kv._1 -> kv._2.offset)

    prevOffsets == currentOffsets match {
      case false => Some(rdd)
      case true => None
    }
  }

  def getCurrentOffsets(): Map[TopicAndPartition, Long] = currentOffsets
  def setCurrentOffsets(offsets: Map[TopicAndPartition, Long]): Unit = {
    currentOffsets = offsets
  }
} 
Example 3
Source File: AvroDecoder.scala    From cuesheet   with Apache License 2.0 5 votes vote down vote up
package com.kakao.cuesheet.convert

import java.util.Arrays.copyOfRange

import kafka.serializer.Decoder
import kafka.utils.VerifiableProperties
import org.apache.avro.Schema
import org.apache.avro.generic.{GenericDatumReader, GenericRecord}


sealed trait AvroDecoder[T] extends Decoder[T] {

  def props: VerifiableProperties

  protected val schema = new Schema.Parser().parse(props.getString(Avro.SCHEMA))
  protected val skipBytes = props.getInt(Avro.SKIP_BYTES, 0)

  protected val reader = new GenericDatumReader[GenericRecord](schema)
  protected val decoder = Avro.recordDecoder(reader)

  private def skip(bytes: Array[Byte], size: Int): Array[Byte] = {
    val length = bytes.length
    length - size match {
      case remaining if remaining > 0 => copyOfRange(bytes, size, length)
      case _ => new Array[Byte](0)
    }
  }

  def parse(bytes: Array[Byte]): GenericRecord = {
    val data = if (skipBytes == 0) bytes else skip(bytes, skipBytes)
    decoder(data)
  }
}

class AvroRecordDecoder(val props: VerifiableProperties) extends AvroDecoder[GenericRecord] {
  override def fromBytes(bytes: Array[Byte]): GenericRecord = parse(bytes)
}

class AvroMapDecoder(val props: VerifiableProperties) extends AvroDecoder[Map[String, Any]] {
  override def fromBytes(bytes: Array[Byte]): Map[String, Any] = Avro.toMap(parse(bytes))
}

class AvroJsonDecoder(val props: VerifiableProperties) extends AvroDecoder[String] {
  override def fromBytes(bytes: Array[Byte]): String = Avro.toJson(parse(bytes))
} 
Example 4
Source File: KafkaConsumerActor.scala    From coral   with Apache License 2.0 5 votes vote down vote up
package io.coral.actors.connector

import java.util.Properties

import akka.actor.Props
import io.coral.actors.CoralActor
import io.coral.actors.connector.KafkaConsumerActor.{StopReadingMessageQueue, ReadMessageQueue}
import io.coral.lib.{ConfigurationBuilder, KafkaJsonConsumer}
import kafka.serializer.Decoder
import kafka.tools.MessageFormatter
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.json4s.JsonAST.{JNothing, JObject, JValue}

object KafkaConsumerActor {
	case class ReadMessageQueue()
	case class StopReadingMessageQueue()

	implicit val formats = org.json4s.DefaultFormats
	val builder = new ConfigurationBuilder("kafka.consumer")

	def getParams(json: JValue) = {
		for {
			kafka <- (json \ "params" \ "kafka").extractOpt[JObject]
			topic <- (json \ "params" \ "topic").extractOpt[String]
		} yield {
			val properties = consumerProperties(kafka)
			(properties, topic)
		}
	}

	def consumerProperties(json: JObject): Properties = {
		val properties = builder.properties

		json.values.foreach {
			case (k: String, v: String) =>
				properties.setProperty(k, v)
		}

		properties
	}

	def apply(json: JValue): Option[Props] = {
		getParams(json).map(_ => Props(classOf[KafkaConsumerActor], json, KafkaJsonConsumer()))
	}

	def apply(json: JValue, decoder: Decoder[JValue]): Option[Props] = {
		getParams(json).map(_ => Props(classOf[KafkaConsumerActor], json, KafkaJsonConsumer(decoder)))
	}
}

class KafkaConsumerActor(json: JObject, connection: KafkaJsonConsumer) extends CoralActor(json) {
	val (properties, topic) = KafkaConsumerActor.getParams(json).get
	lazy val stream = connection.stream(topic, properties)
	var shouldStop = false

	override def preStart(): Unit = {
		super.preStart()
	}

	override def receiveExtra: Receive = {
		case ReadMessageQueue() if stream.hasNextInTime =>
			val message: JValue = stream.next
			stream.commitOffsets

			if (message != JNothing) {
				emit(message)
			}

			if (!shouldStop) {
				self ! ReadMessageQueue()
			}
		case ReadMessageQueue() =>
			self ! ReadMessageQueue()
		case StopReadingMessageQueue() =>
			shouldStop = true
	}

	
} 
Example 5
Source File: KafkaJsonConsumer.scala    From coral   with Apache License 2.0 5 votes vote down vote up
package io.coral.lib

import java.util.Properties

import com.fasterxml.jackson.core.JsonParseException
import kafka.consumer._
import kafka.serializer.{Decoder, DefaultDecoder}
import org.json4s.JsonAST.{JNothing, JValue}
import org.json4s.jackson.JsonMethods._

object KafkaJsonConsumer {
	def apply() = new KafkaJsonConsumer(JsonDecoder)
	def apply(decoder: Decoder[JValue]) = new KafkaJsonConsumer(decoder)
}

class KafkaJsonConsumer(decoder: Decoder[JValue]) {
	def stream(topic: String, properties: Properties): KafkaJsonStream = {
		val connection = Consumer.create(new ConsumerConfig(properties))
		val stream = connection.createMessageStreamsByFilter(
			Whitelist(topic), 1, new DefaultDecoder, decoder)(0)
		new KafkaJsonStream(connection, stream)
	}
}

class KafkaJsonStream(connection: ConsumerConnector, stream: KafkaStream[Array[Byte], JValue]) {
	private lazy val it = stream.iterator

	// this method relies on a timeout value having been set
	@inline def hasNextInTime: Boolean =
		try {
			it.hasNext
		} catch {
			case cte: ConsumerTimeoutException => false
		}

	@inline def next: JValue = it.next.message
	@inline def commitOffsets = connection.commitOffsets
}

object JsonDecoder extends Decoder[JValue] {
	val encoding = "UTF8"

	override def fromBytes(bytes: Array[Byte]): JValue = {
		val s = new String(bytes, encoding)
		try {
			parse(s)
		} catch {
			case jpe: JsonParseException => JNothing
		}
	}
}