kafka.producer.KeyedMessage Scala Examples

The following examples show how to use kafka.producer.KeyedMessage. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: WalLogStat.scala    From incubator-s2graph   with Apache License 2.0 5 votes vote down vote up
package org.apache.s2graph.loader.subscriber

import kafka.producer.KeyedMessage
import kafka.serializer.StringDecoder
import org.apache.s2graph.spark.spark.{WithKafka, SparkApp}
import org.apache.spark.streaming.Durations._
import org.apache.spark.streaming.kafka.HasOffsetRanges
import scala.collection.mutable.{HashMap => MutableHashMap}
import scala.language.postfixOps

object WalLogStat extends SparkApp with WithKafka {

  override def run() = {

    validateArgument("kafkaZkQuorum", "brokerList", "topics", "intervalInSec", "dbUrl", "statTopic")

    val kafkaZkQuorum = args(0)
    val brokerList = args(1)
    val topics = args(2)
    val intervalInSec = seconds(args(3).toLong)
    val dbUrl = args(4)
    val statTopic = args(5)


    val conf = sparkConf(s"$topics: ${getClass.getSimpleName}")
    val ssc = streamingContext(conf, intervalInSec)
    val sc = ssc.sparkContext

    val groupId = topics.replaceAll(",", "_") + "_stat"

    val kafkaParams = Map(
      "zookeeper.connect" -> kafkaZkQuorum,
      "group.id" -> groupId,
      "metadata.broker.list" -> brokerList,
      "zookeeper.connection.timeout.ms" -> "10000",
      "auto.offset.reset" -> "largest")

    val stream = getStreamHelper(kafkaParams).createStream[String, String, StringDecoder, StringDecoder](ssc, topics.split(",").toSet)
    val statProducer = getProducer[String, String](brokerList)

    stream.foreachRDD { (rdd, time) =>

      val offsets = rdd.asInstanceOf[HasOffsetRanges].offsetRanges

      val ts = time.milliseconds

      val elements = rdd.mapPartitions { partition =>
        // set executor setting.
        val phase = System.getProperty("phase")
        GraphSubscriberHelper.apply(phase, dbUrl, "none", brokerList)
        partition.map { case (key, msg) =>
          GraphSubscriberHelper.g.elementBuilder.toGraphElement(msg) match {
            case Some(elem) =>
              val serviceName = elem.serviceName
              msg.split("\t", 7) match {
                case Array(_, operation, log_type, _, _, label, _*) =>
                  Seq(serviceName, label, operation, log_type).mkString("\t")
                case _ =>
                  Seq("no_service_name", "no_label", "no_operation", "parsing_error").mkString("\t")
              }
            case None =>
              Seq("no_service_name", "no_label", "no_operation", "no_element_error").mkString("\t")
          }
        }
      }

      val countByKey = elements.map(_ -> 1L).reduceByKey(_ + _).collect()
      val totalCount = countByKey.map(_._2).sum
      val keyedMessage = countByKey.map { case (key, value) =>
        new KeyedMessage[String, String](statTopic, s"$ts\t$key\t$value\t$totalCount")
      }

      statProducer.send(keyedMessage: _*)

      elements.mapPartitionsWithIndex { (i, part) =>
        // commit offset range
        val osr = offsets(i)
        getStreamHelper(kafkaParams).commitConsumerOffset(osr)
        Iterator.empty
      }.foreach {
        (_: Nothing) => ()
      }

    }

    ssc.start()
    ssc.awaitTermination()
  }
} 
Example 2
Source File: GraphToETLStreaming.scala    From incubator-s2graph   with Apache License 2.0 5 votes vote down vote up
package org.apache.s2graph.counter.loader.stream

import kafka.producer.KeyedMessage
import kafka.serializer.StringDecoder
import org.apache.s2graph.core.GraphUtil
import org.apache.s2graph.counter.config.S2CounterConfig
import org.apache.s2graph.counter.loader.config.StreamingConfig
import org.apache.s2graph.spark.config.S2ConfigFactory
import org.apache.s2graph.spark.spark.{WithKafka, SparkApp, HashMapParam}
import org.apache.spark.streaming.Durations._
import org.apache.spark.streaming.kafka.KafkaRDDFunctions.rddToKafkaRDDFunctions
import scala.collection.mutable
import scala.collection.mutable.{HashMap => MutableHashMap}

object GraphToETLStreaming extends SparkApp with WithKafka {
  lazy val config = S2ConfigFactory.config
  lazy val s2Config = new S2CounterConfig(config)
  lazy val className = getClass.getName.stripSuffix("$")
  lazy val producer = getProducer[String, String](StreamingConfig.KAFKA_BROKERS)

  override def run(): Unit = {
    validateArgument("interval", "topic")
    val (intervalInSec, topic) = (seconds(args(0).toLong), args(1))

    val groupId = buildKafkaGroupId(topic, "graph_to_etl")
    val kafkaParam = Map(
//      "auto.offset.reset" -> "smallest",
      "group.id" -> groupId,
      "metadata.broker.list" -> StreamingConfig.KAFKA_BROKERS,
      "zookeeper.connect" -> StreamingConfig.KAFKA_ZOOKEEPER,
      "zookeeper.connection.timeout.ms" -> "10000"
    )

    val conf = sparkConf(s"$topic: $className")
    val ssc = streamingContext(conf, intervalInSec)
    val sc = ssc.sparkContext

    val acc = sc.accumulable(MutableHashMap.empty[String, Long], "Throughput")(HashMapParam[String, Long](_ + _))

    
    val stream = getStreamHelper(kafkaParam).createStream[String, String, StringDecoder, StringDecoder](ssc, topic.split(',').toSet)
    stream.foreachRDD { rdd =>
      rdd.foreachPartitionWithOffsetRange { case (osr, part) =>
        val m = MutableHashMap.empty[Int, mutable.MutableList[String]]
        for {
          (k, v) <- part
          line <- GraphUtil.parseString(v)
        } {
          try {
            val sp = GraphUtil.split(line)
            // get partition key by target vertex id
            val partKey = getPartKey(sp(4), 20)
            val values = m.getOrElse(partKey, mutable.MutableList.empty[String])
            values += line
            m.update(partKey, values)
          } catch {
            case ex: Throwable =>
              log.error(s"$ex: $line")
          }
        }

        m.foreach { case (k, v) =>
          v.grouped(1000).foreach { grouped =>
            producer.send(new KeyedMessage[String, String](StreamingConfig.KAFKA_TOPIC_ETL, null, k, grouped.mkString("\n")))
          }
        }

        getStreamHelper(kafkaParam).commitConsumerOffset(osr)
      }
    }

    ssc.start()
    ssc.awaitTermination()
  }
} 
Example 3
Source File: KafkaSpanHandler.scala    From money   with Apache License 2.0 5 votes vote down vote up
package com.comcast.money.kafka

import java.util.Properties

import com.comcast.money.api.SpanInfo
import com.comcast.money.core.handlers.ConfigurableHandler
import com.comcast.money.wire.AvroConversions
import com.typesafe.config.Config
import kafka.producer.{ ProducerConfig, KeyedMessage, Producer }

// We use the producer maker so that we can mock this out
trait ProducerMaker {
  def makeProducer(conf: Config): Producer[Array[Byte], Array[Byte]]
}

trait ConfigDrivenProducerMaker extends ProducerMaker {

  def makeProducer(conf: Config): Producer[Array[Byte], Array[Byte]] = {

    val props = new Properties()

    props.put("compression.codec", conf.getString("compression.codec"))
    props.put("producer.type", conf.getString("producer.type"))
    props.put("batch.num.messages", conf.getString("batch.num.messages"))
    props.put("message.send.max.retries", conf.getString("message.send.max.retries"))
    props.put("metadata.broker.list", conf.getString("metadata.broker.list"))

    new Producer[Array[Byte], Array[Byte]](new ProducerConfig(props))
  }
}

class KafkaSpanHandler extends ConfigurableHandler with ConfigDrivenProducerMaker {

  import AvroConversions._

  private[kafka] var topic: String = _
  private[kafka] var producer: Producer[Array[Byte], Array[Byte]] = _

  def configure(config: Config): Unit = {
    producer = makeProducer(config)
    topic = config.getString("topic")
  }

  def handle(span: SpanInfo): Unit = {
    producer.send(new KeyedMessage(topic, span.convertTo[Array[Byte]]))
  }
} 
Example 4
Source File: KafkaSpanHandlerSpec.scala    From money   with Apache License 2.0 5 votes vote down vote up
package com.comcast.money.kafka

import com.comcast.money.api.Note
import com.comcast.money.{ api, core }
import com.typesafe.config.{ Config, ConfigFactory }
import kafka.message.{ CompressionCodec, GZIPCompressionCodec }
import kafka.producer.{ KeyedMessage, Producer }
import org.mockito.ArgumentCaptor
import org.mockito.Mockito._
import org.scalatest.mockito.MockitoSugar
import org.scalatest.{ BeforeAndAfterAll, Matchers, WordSpec }

import scala.collection.JavaConverters._

trait MockProducerMaker extends ProducerMaker {

  val mockProducer = mock(classOf[Producer[Array[Byte], Array[Byte]]])

  def makeProducer(conf: Config): Producer[Array[Byte], Array[Byte]] = mockProducer
}

class TestKafkaSpanHandler extends KafkaSpanHandler {

  var producerWasMade = false
  val mockProducer = mock(classOf[Producer[Array[Byte], Array[Byte]]])

  override def makeProducer(conf: Config): Producer[Array[Byte], Array[Byte]] = {
    producerWasMade = true
    mockProducer
  }
}

class KafkaSpanHandlerSpec extends WordSpec
  with Matchers
  with MockitoSugar
  with BeforeAndAfterAll {

  trait KafkaFixture {
    val testConfig = mock[Config]
    when(testConfig.getString("topic")).thenReturn("test-topic")

    val underTest = new TestKafkaSpanHandler()
    underTest.configure(testConfig)

    val testProducer = underTest.mockProducer
    val sampleData = core.CoreSpanInfo(
      id = new api.SpanId("foo", 1L),
      name = "key",
      appName = "app",
      host = "host",
      startTimeMillis = 1L,
      success = true,
      durationMicros = 35L,
      notes = Map[String, Note[_]]("what" -> api.Note.of("what", 1L), "when" -> api.Note.of("when", 2L), "bob" -> api.Note.of("bob", "craig")).asJava)
  }

  "A KafkaEmitter" should {
    "make a producer in configure" in new KafkaFixture {
      underTest.producerWasMade shouldBe true
    }
    "send a message to the producer for a span" in new KafkaFixture {
      underTest.handle(sampleData)

      val captor = ArgumentCaptor.forClass(classOf[KeyedMessage[Array[Byte], Array[Byte]]])
      verify(testProducer).send(captor.capture())
    }
  }

  "A ConfigDrivenProducerMaker" should {
    "set the properties from the config" in {
      val config = ConfigFactory.parseString(
        """
          | topic = "money"
          | compression.codec = "1"
          | producer.type = "async"
          | batch.num.messages = "1"
          | message.send.max.retries = "3"
          | request.required.acks = "0"
          | metadata.broker.list = "localhost:9092"
        """.stripMargin)
      val testHandler = new KafkaSpanHandler()
      testHandler.configure(config)

      val producerConfig = testHandler.producer.config
      producerConfig.brokerList shouldBe "localhost:9092"
      producerConfig.compressionCodec shouldBe GZIPCompressionCodec
      producerConfig.producerType shouldBe "async"
      producerConfig.batchNumMessages shouldBe 1
      producerConfig.messageSendMaxRetries shouldBe 3
      producerConfig.requestRequiredAcks shouldBe 0
    }
  }
} 
Example 5
Source File: KafkaProducer.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.kafka.benchmark.generator.kafka

import java.util.Properties

import com.typesafe.config.Config
import kafka.producer.{KeyedMessage, Producer, ProducerConfig}

object KafkaProducer {

  def getInstance(config: Config): Producer[String, String] = {
    val props: Properties = new Properties()
    props.put("metadata.broker.list", config.getString("brokerList"))
    props.put("serializer.class", "kafka.serializer.StringEncoder")
    props.put("request.required.acks", "1")

    val producerConfig = new ProducerConfig(props)
    new Producer[String, String](producerConfig)
  }

  def send(producer: Producer[String, String], topic: String, message: String): Unit = {
    val keyedMessage: KeyedMessage[String, String] = new KeyedMessage[String, String](topic, message)
    producer.send(keyedMessage)
  }
} 
Example 6
Source File: Actors.scala    From embedded-kafka   with Apache License 2.0 5 votes vote down vote up
package com.tuplejump.embedded.kafka

import scala.reflect.ClassTag
import scala.util.Try
import akka.actor.Actor
import akka.actor.Props
import kafka.producer.{ProducerConfig, KeyedMessage, Producer}

// only string so far
//TODO error handling
class KafkaPublisher[K,V : ClassTag](producer: Producer[K,V]) extends Actor {

  override def postStop(): Unit = Try(producer.close())

  def receive: Actor.Receive = {
    case e: Events.PublishTo[V] => publish(e)
  }

  private def publish(e: Events.PublishTo[V]): Unit =
    producer.send(e.data.toArray.map { new KeyedMessage[K,V](e.topic, _) }: _*)

}


object KafkaPublisher {

  def props(producerConfig: ProducerConfig): Props = {
    val producer = new Producer[String,String](producerConfig)
    Props(new KafkaPublisher(producer))
  }
} 
Example 7
Source File: KafkaProducer.scala    From spark-ref-architecture   with Apache License 2.0 5 votes vote down vote up
package com.stc.spark.streaming.kafka

import java.util.{Date, Properties}

import kafka.producer.{KeyedMessage, Producer, ProducerConfig}

import scala.util.Random

object KafkaProducer extends App {
  val events = args(0).toInt
  val topic = args(1)
  val brokers = args(2)
  val rnd = new Random()

  val producer = new Producer[String, String](KafkaConfig.config)
  val t = System.currentTimeMillis()
  for (nEvents <- Range(0, events)) {
    val runtime = new Date().getTime();
    val ip = "192.168.2." + rnd.nextInt(255);
    val msg = runtime + "," + nEvents + ",www.example.com," + ip;
    val data = new KeyedMessage[String, String](topic, ip, msg);
    producer.send(data);
  }

  System.out.println("sent per second: " + events * 1000 / (System.currentTimeMillis() - t));
  producer.close();
} 
Example 8
Source File: UserBehaviorMsgProducer.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.examples.streaming.IBMKafkaStream

import scala.util.Random
import java.util.Properties
import kafka.producer.KeyedMessage
import kafka.producer.ProducerConfig
import kafka.producer.Producer

class UserBehaviorMsgProducer(brokers: String, topic: String) extends Runnable {
  private val brokerList = brokers
  private val targetTopic = topic
  private val props = new Properties()
  props.put("metadata.broker.list", this.brokerList)
  props.put("serializer.class", "kafka.serializer.StringEncoder")
  props.put("producer.type", "async")
  private val config = new ProducerConfig(this.props)
  private val producer = new Producer[String, String](this.config)

  private val PAGE_NUM = 100
  private val MAX_MSG_NUM = 3
  private val MAX_CLICK_TIME = 5
  private val MAX_STAY_TIME = 10
  //Like,1;Dislike -1;No Feeling 0
  private val LIKE_OR_NOT = Array[Int](1, 0, -1)

  def run(): Unit = {
    val rand = new Random()
    while (true) {
      //how many user behavior messages will be produced
      val msgNum = rand.nextInt(MAX_MSG_NUM) + 1
      try {
        //generate the message with format like page1|2|7.123|1
        //网页 ID|点击次数|停留时间 (分钟)|是否点赞
        //(page001.html, 1, 0.5, 1)
        //向量的第一项表示网页的ID,第二项表示从进入网站到离开对该网页的点击次数,第三项表示停留时间,以分钟为单位,第四项是代表是否点赞,1 为赞,-1 表示踩,0 表示中立。
        for (i <- 0 to msgNum) {
          var msg = new StringBuilder()
          msg.append("page" + (rand.nextInt(PAGE_NUM) + 1))
          msg.append("|")
          msg.append(rand.nextInt(MAX_CLICK_TIME) + 1)
          msg.append("|")
          msg.append(rand.nextInt(MAX_CLICK_TIME) + rand.nextFloat())
          msg.append("|")
          msg.append(LIKE_OR_NOT(rand.nextInt(3)))
          println(msg.toString())
          //send the generated message to broker
          sendMessage(msg.toString())
        }
        println("%d user behavior messages produced.".format(msgNum+1))
      } catch {
        case e: Exception => println(e)
      }
      try {
        //sleep for 5 seconds after send a micro batch of message
        //每隔 5 秒钟会随机的向 user-behavior-topic 主题推送 0 到 50 条行为数据消息
        Thread.sleep(5000)
      } catch {
        case e: Exception => println(e)
      }
    }
  }
  def sendMessage(message: String) = {
    try {
      val data = new KeyedMessage[String, String](this.topic, message);
      producer.send(data);
    } catch {
      case e:Exception => println(e)
    }
  }
}
object UserBehaviorMsgProducerClient {
  def main(args: Array[String]) {

    //start the message producer thread
    val Array(zkServers,processingInterval) = Array("192.168.200.80:9092","topic")//args

    new Thread(new UserBehaviorMsgProducer(zkServers,processingInterval)).start()
  }
} 
Example 9
Source File: KafkaJsonProducer.scala    From coral   with Apache License 2.0 5 votes vote down vote up
package io.coral.lib

import java.util.Properties

import io.coral.lib.KafkaJsonProducer.KafkaEncoder
import kafka.producer.{KeyedMessage, ProducerConfig, Producer}
import kafka.serializer.Encoder
import kafka.utils.VerifiableProperties
import org.json4s.JsonAST.{JObject, JValue}
import org.json4s.jackson.JsonMethods._

object KafkaJsonProducer {
	type KafkaEncoder = Encoder[JValue]
	def apply() = new KafkaJsonProducer(classOf[JsonEncoder])
	def apply[T <: KafkaEncoder](encoder: Class[T]) = new KafkaJsonProducer(encoder)
}

class KafkaJsonProducer[T <: KafkaEncoder](encoderClass: Class[T]) {
	def createSender(topic: String, properties: Properties): KafkaSender = {
		val props = properties.clone.asInstanceOf[Properties]
		props.put("serializer.class", encoderClass.getName)
		val producer = createProducer(props)
		new KafkaSender(topic, producer)
	}

	def createProducer(props: Properties): Producer[String, JValue] = {
		new Producer[String, JValue](new ProducerConfig(props))
	}
}

class KafkaSender(topic: String, producer: Producer[String, JValue]) {
	def send(key: Option[String], message: JObject) = {
		val keyedMessage: KeyedMessage[String, JValue] = key match {
			case Some(key) => new KeyedMessage(topic, key, message)
			case None => new KeyedMessage(topic, message)
		}

		producer.send(keyedMessage)
	}
}

class JsonEncoder(verifiableProperties: VerifiableProperties) extends KafkaEncoder {
	override def toBytes(value: JValue): Array[Byte] = {
		compact(value).getBytes("UTF-8")
	}
} 
Example 10
Source File: KafkaJsonProducerSpec.scala    From coral   with Apache License 2.0 5 votes vote down vote up
package io.coral.lib

import java.util.Properties

import io.coral.lib.KafkaJsonProducer.KafkaEncoder
import kafka.utils.VerifiableProperties
import org.json4s.JsonAST.{JObject, JValue}
import org.scalatest.{Matchers, WordSpec}
import org.json4s.jackson.JsonMethods._
import kafka.producer.{ProducerConfig, KeyedMessage, Producer}
import org.mockito.{Mockito, ArgumentCaptor}
import org.mockito.Mockito._
import scala.collection.mutable

class KafkaJsonProducerSpec extends WordSpec with Matchers {
	"A KafkaJsonProducer" should {
		"create a KafkaJsonProducer with the JsonEncoder" in {
			val producer = KafkaJsonProducer()
			assert(producer.getClass == classOf[KafkaJsonProducer[JsonEncoder]])
		}

		"create a KafkaJsonProducer with the specified Encoder" in {
			val producer = KafkaJsonProducer(classOf[MyEncoder])
			assert(producer.getClass == classOf[KafkaJsonProducer[MyEncoder]])
		}

		"create a sender" in {
			val producer = new MyKafkaJsonProducer
			producer.createSender("topic", new Properties)
			val serializer = producer.receivedProperties.get("serializer.class")
			assert(serializer == classOf[MyEncoder].getName)
		}
	}

	"A KafkaSender" should {
		"send the JSON provided without a key to Kafka" in {
			val messageJson = """{"key1": "value1", "key2": "value2"}"""

			val keyedMessage = sendMessage(None, messageJson)

			assert(keyedMessage.topic == "test")
			assert(keyedMessage.hasKey == false)
			assert(keyedMessage.message == parse(messageJson))
		}

		"send the JSON provided with a key to Kafka" in {
			val messageJson = """{"key3": "value3", "key4": "value4"}"""

			val keyedMessage = sendMessage(Some("key"), messageJson)

			assert(keyedMessage.key == "key")
			assert(keyedMessage.topic == "test")
			assert(keyedMessage.message == parse(messageJson))
		}
	}

	"A JsonEncoder" should {
		"encode the provided json" in {
			val json = """{"key1": "value1"}"""
			val encoder = new JsonEncoder(new VerifiableProperties)
			val result = encoder.toBytes(parse(json))
			assert(parse(new String(result, "UTF-8")) == parse(json))
		}
	}

	private def sendMessage(key: Option[String], messageJson: String): KeyedMessage[String, JValue] = {
		val producer = Mockito.mock(classOf[Producer[String, JValue]])
		val sender = new KafkaSender("test", producer)
		sender.send(key, parse(messageJson).asInstanceOf[JObject])

		val argumentCaptor = ArgumentCaptor.forClass(classOf[KeyedMessage[String, JValue]])
		verify(producer).send(argumentCaptor.capture())

		val keyedMessages = argumentCaptor.getAllValues
		assert(keyedMessages.size == 1)

		// The following construction is necessary because capturing of parameters
		// with Mockito, Scala type interference, and multiple arguments
		// don't work together without explicit casts.
		keyedMessages.get(0).asInstanceOf[mutable.WrappedArray.ofRef[KeyedMessage[String, JValue]]](0)
	}
}

class MyEncoder(verifiableProperties: VerifiableProperties) extends KafkaEncoder {
	override def toBytes(value: JValue): Array[Byte] = {
		Array()
	}
}

class MyKafkaJsonProducer extends KafkaJsonProducer(classOf[MyEncoder]) {
	var receivedProperties: Properties = _

	override def createProducer(props: Properties): Producer[String, JValue] = {
		receivedProperties = props
		Mockito.mock(classOf[Producer[String, JValue]])
	}
}