org.apache.kafka.clients.consumer.KafkaConsumer Scala Examples

The following examples show how to use org.apache.kafka.clients.consumer.KafkaConsumer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: KafkaTopicInfo.scala    From matcher   with MIT License 7 votes vote down vote up
package tools

import java.io.File

import akka.actor.ActorSystem
import com.typesafe.config.ConfigFactory
import com.wavesplatform.dex.queue.KafkaMatcherQueue.eventDeserializer
import com.wavesplatform.dex.queue.{QueueEvent, QueueEventWithMeta}
import com.wavesplatform.dex.settings.toConfigOps
import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.StringDeserializer

import scala.collection.JavaConverters._
import scala.concurrent.duration.DurationInt

object KafkaTopicInfo extends App {
  implicit val system: ActorSystem = ActorSystem()

  val configFile = new File(args(0))
  val topic      = args(1)
  val from       = args(2).toLong
  val max        = args(3).toInt

  println(s"""configFile: ${configFile.getAbsolutePath}
             |topic: $topic
             |from: $from
             |max: $max""".stripMargin)

  val requestTimeout = java.time.Duration.ofNanos(5.seconds.toNanos)

  val config = ConfigFactory
    .parseString("""waves.dex.events-queue.kafka.consumer.client {
                   |  client.id = "kafka-topics-info"
                   |  enable.auto.commit = false
                   |  auto.offset.reset = earliest
                   |}
                   |
                   |""".stripMargin)
    .withFallback {
      ConfigFactory
        .parseFile(configFile)
        .withFallback(ConfigFactory.defaultApplication())
        .withFallback(ConfigFactory.defaultReference())
        .resolve()
        .getConfig("waves.dex.events-queue.kafka")
    }

  val consumer = new KafkaConsumer[String, QueueEvent](
    config.getConfig("waves.dex.events-queue.kafka.consumer.client").toProperties,
    new StringDeserializer,
    eventDeserializer
  )

  try {
    val topicPartition  = new TopicPartition(topic, 0)
    val topicPartitions = java.util.Collections.singletonList(topicPartition)
    consumer.assign(topicPartitions)

    {
      val r = consumer.partitionsFor(topic, requestTimeout)
      println(s"Partitions:\n${r.asScala.mkString("\n")}")
    }

    {
      val r = consumer.endOffsets(topicPartitions, requestTimeout)
      println(s"End offsets for $topicPartition: ${r.asScala.mkString(", ")}")
    }

    consumer.seek(topicPartition, from)

    val pollDuriation = java.time.Duration.ofNanos(1.seconds.toNanos)
    val lastOffset    = from + max
    var continue      = true
    while (continue) {
      println(s"Reading from Kafka")

      val xs = consumer.poll(pollDuriation).asScala.toVector
      xs.foreach { msg =>
        println(QueueEventWithMeta(msg.offset(), msg.timestamp(), msg.value()))
      }

      xs.lastOption.foreach { x =>
        if (x.offset() == lastOffset) continue = false
      }
    }
  } finally {
    consumer.close()
  }
} 
Example 2
Source File: MessageListener.scala    From model-serving-tutorial   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.modelserving.client

import java.time.Duration
import java.util.Properties

import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer}
import org.apache.kafka.common.serialization.ByteArrayDeserializer


class MessageListener[K, V](
                             brokers: String,
                             topic: String,
                             group: String,
                             keyDeserealizer: String,
                             valueDeserealizer: String,
                             processor: RecordProcessorTrait[K, V]) extends Runnable {

  import MessageListener._

  import scala.collection.JavaConverters._

  val consumer = new KafkaConsumer[K, V](consumerProperties(brokers, group, keyDeserealizer, valueDeserealizer))
  consumer.subscribe(Seq(topic).asJava)
  var completed = false

  def complete(): Unit = {
    completed = true
  }

  override def run(): Unit = {
    while (!completed) {
      val records = consumer.poll(Duration.ofMillis(100)).asScala
      for (record <- records) {
        processor.processRecord(record)
      }
    }
    consumer.close()
    System.out.println("Listener completes")
  }

  def start(): Unit = {
    val t = new Thread(this)
    t.start()
  }
} 
Example 3
Source File: CachedKafkaConsumer.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import java.{util => ju}

import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord, KafkaConsumer}
import org.apache.kafka.common.TopicPartition

import org.apache.spark.{SparkEnv, SparkException, TaskContext}
import org.apache.spark.internal.Logging



  def getOrCreate(
      topic: String,
      partition: Int,
      kafkaParams: ju.Map[String, Object]): CachedKafkaConsumer = synchronized {
    val groupId = kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG).asInstanceOf[String]
    val topicPartition = new TopicPartition(topic, partition)
    val key = CacheKey(groupId, topicPartition)

    // If this is reattempt at running the task, then invalidate cache and start with
    // a new consumer
    if (TaskContext.get != null && TaskContext.get.attemptNumber > 1) {
      cache.remove(key)
      new CachedKafkaConsumer(topicPartition, kafkaParams)
    } else {
      if (!cache.containsKey(key)) {
        cache.put(key, new CachedKafkaConsumer(topicPartition, kafkaParams))
      }
      cache.get(key)
    }
  }
} 
Example 4
Source File: MessageListener.scala    From kafka-with-akka-streams-kafka-streams-tutorial   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.scala.kafka

import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer}
import org.apache.kafka.common.serialization.ByteArrayDeserializer

import scala.collection.JavaConverters._


object MessageListener {
  private val AUTOCOMMITINTERVAL = "1000" // Frequency off offset commits
  private val SESSIONTIMEOUT = "30000"    // The timeout used to detect failures - should be greater then processing time
  private val MAXPOLLRECORDS = "10"       // Max number of records consumed in a single poll

  def consumerProperties(brokers: String, group: String, keyDeserealizer: String, valueDeserealizer: String): Map[String, Object] = {
    Map[String, Object](
      ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokers,
      ConsumerConfig.GROUP_ID_CONFIG -> group,
      ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> "true",
      ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG -> AUTOCOMMITINTERVAL,
      ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG -> SESSIONTIMEOUT,
      ConsumerConfig.MAX_POLL_RECORDS_CONFIG -> MAXPOLLRECORDS,
      ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "earliest",
      ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> keyDeserealizer,
      ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> valueDeserealizer
    )
  }

  def apply[K, V](brokers: String, topic: String, group: String,
                  processor: RecordProcessorTrait[K, V]): MessageListener[K, V] =
    new MessageListener[K, V](brokers, topic, group, classOf[ByteArrayDeserializer].getName, classOf[ByteArrayDeserializer].getName, processor)
}

class MessageListener[K, V](brokers: String, topic: String, group: String, keyDeserealizer: String, valueDeserealizer: String,
                            processor: RecordProcessorTrait[K, V]) extends Runnable {

  import MessageListener._

  val consumer = new KafkaConsumer[K, V](consumerProperties(brokers, group, keyDeserealizer, valueDeserealizer).asJava)
  consumer.subscribe(Seq(topic).asJava)
  var completed = false

  def complete(): Unit = {
    completed = true
  }

  override def run(): Unit = {
    while (!completed) {
      val records = consumer.poll(100)
      for (record <- records.asScala) {
        processor.processRecord(record)
      }
    }
    consumer.close()
    System.out.println("Listener completes")
  }

  def start(): Unit = {
    val t = new Thread(this)
    t.start()
  }
} 
Example 5
Source File: Consumer.scala    From fusion-data   with Apache License 2.0 5 votes vote down vote up
package kafkasample.demo

import java.util.{ Collections, Properties }
import java.util.concurrent.TimeUnit

import org.apache.kafka.clients.consumer.KafkaConsumer

object Consumer {
  @volatile private var isStop = false

  def main(args: Array[String]): Unit = {
    val props = new Properties()
    props.put("bootstrap.servers", "localhost:9092")
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
    props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
    props.put("group.id", "CountryCounter")
    val consumer = new KafkaConsumer[String, String](props)
    val thread = new Thread() {
      override def run(): Unit = Consumer.run(consumer)
    }
    try {
      thread.start()
    } finally {
      TimeUnit.SECONDS.sleep(50)
      isStop = true
      thread.join()
      consumer.close()
    }
  }

  private def run(consumer: KafkaConsumer[String, String]): Unit = {
    consumer.subscribe(Collections.singleton("customerCountries"))
    while (!isStop && !Thread.currentThread().isInterrupted) {
      val records = consumer.poll(java.time.Duration.ofMillis(100))
      records.forEach { record =>
        println(s"topic = ${record.topic()}, partition = ${record.partition()}, offset = ${record
          .offset()}, key: ${record.key()}, value = ${record.value()}")
      }
      consumer.commitAsync()
    }
  }
} 
Example 6
Source File: ConsumerStrategy.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import java.{util => ju}

import scala.collection.JavaConverters._

import org.apache.kafka.clients.consumer.{Consumer, KafkaConsumer}
import org.apache.kafka.clients.consumer.internals.NoOpConsumerRebalanceListener
import org.apache.kafka.common.TopicPartition


case class SubscribePatternStrategy(topicPattern: String) extends ConsumerStrategy {
  override def createConsumer(
      kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = {
    val consumer = new KafkaConsumer[Array[Byte], Array[Byte]](kafkaParams)
    consumer.subscribe(
      ju.regex.Pattern.compile(topicPattern),
      new NoOpConsumerRebalanceListener())
    consumer
  }

  override def toString: String = s"SubscribePattern[$topicPattern]"
} 
Example 7
Source File: KafkaClient.scala    From mist   with Apache License 2.0 5 votes vote down vote up
package io.hydrosphere.mist.master.interfaces.async.kafka

import java.util.UUID
import java.util.concurrent.atomic.AtomicBoolean

import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}

import scala.collection.JavaConverters._
import scala.concurrent.{Future, Promise}

class TopicProducer[K, V](
  producer: KafkaProducer[K, V],
  topic: String
) {

  def send(key:K, value: V): Unit = {
    val record = new ProducerRecord(topic, key, value)
    producer.send(record)
  }
  def close(): Unit = {
    producer.close()
  }

}

object TopicProducer {

  def apply(
    host: String,
    port: Int,
    topic: String): TopicProducer[String, String] = {

    val props = new java.util.Properties()
    props.put("bootstrap.servers", s"$host:$port")

    val producer = new KafkaProducer(props, new StringSerializer, new StringSerializer)
    new TopicProducer(producer, topic)
  }
}

class TopicConsumer[K, V](
  consumer: KafkaConsumer[K, V],
  topic: String,
  timeout: Long = 100
) {

  private val promise = Promise[Unit]
  private val stopped = new AtomicBoolean(false)

  def subscribe(f: (K, V) => Unit): Future[Unit] = {
    run(f)
    promise.future
  }

  private def run(f: (K, V) => Unit): Unit = {
    consumer.subscribe(Seq(topic).asJava)
    val thread = new Thread(new Runnable {
      override def run(): Unit = {
        while (!stopped.get()) {
          val records = consumer.poll(timeout).asScala
          records.foreach(r => f(r.key(), r.value()))
        }
        promise.success(())
      }
    })
    thread.setName(s"kafka-topic-consumer-$topic")
    thread.start()
  }

  def close(): Future[Unit] = {
    stopped.set(true)
    promise.future
  }
}

object TopicConsumer {

  def apply(
    host: String,
    port: Int,
    topic: String): TopicConsumer[String, String] = {

    val props = new java.util.Properties()
    props.put("bootstrap.servers", s"$host:$port")
    props.put("group.id", "mist-" + UUID.randomUUID().toString)
    props.put("enable.auto.commit", "true")
    props.put("auto.commit.interval.ms", "1000")
    props.put("session.timeout.ms", "30000")

    val consumer = new KafkaConsumer(props, new StringDeserializer, new StringDeserializer)
    new TopicConsumer(consumer, topic)
  }

} 
Example 8
Source File: AvroMessageFormatterMain.scala    From affinity   with Apache License 2.0 5 votes vote down vote up
import java.time.temporal.ChronoUnit
import java.util.Properties

import io.amient.affinity.kafka.AvroMessageFormatter
import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.common.serialization.ByteArrayDeserializer

import scala.collection.JavaConverters._

object AvroMessageFormatterMain extends App {

  val consumerProps = new Properties()
  consumerProps.put("bootstrap.servers", args(0))
  consumerProps.put("security.protocol", "SASL_SSL")
  consumerProps.put("sasl.mechanism", "PLAIN")
  consumerProps.put("client.id", "console")
  consumerProps.put("group.id", "console")
  consumerProps.put("key.deserializer", classOf[ByteArrayDeserializer])
  consumerProps.put("value.deserializer", classOf[ByteArrayDeserializer])
  consumerProps.put("sasl.jaas.config", "org.apache.kafka.common.security.plain.PlainLoginModule required username='" + args(1) + "' password='" + args(2) + "';")
  consumerProps.put("schema.registry.url", args(3))
  val c = new KafkaConsumer[Array[Byte], Array[Byte]](consumerProps)
  c.subscribe(List(args(4)).asJava)
  val formatter = new AvroMessageFormatter()
  formatter.init(consumerProps)
  while (true) {
    c.poll(java.time.Duration.of(1, ChronoUnit.SECONDS)).asScala.foreach {
      record => formatter.writeTo(record, System.out)
    }
  }

} 
Example 9
Source File: CreateConsumerJ.scala    From skafka   with MIT License 5 votes vote down vote up
package com.evolutiongaming.skafka.consumer

import com.evolutiongaming.catshelper.ToTry
import com.evolutiongaming.skafka.Converters._
import com.evolutiongaming.skafka.{Blocking, FromBytes}
import org.apache.kafka.clients.consumer.KafkaConsumer

object CreateConsumerJ {

  def apply[F[_] : ToTry, K, V](
    config: ConsumerConfig,
    FromBytesK: FromBytes[F, K],
    fromBytesV: FromBytes[F, V],
    blocking: Blocking[F]
  ): F[KafkaConsumer[K, V]] = {

    val deserializerK = fromBytesV.asJava
    val deserializerV = FromBytesK.asJava
    blocking { new KafkaConsumer(config.properties, deserializerV, deserializerK) }
  }
} 
Example 10
Source File: BasicConsumerExample.scala    From kafka_training   with Apache License 2.0 5 votes vote down vote up
package com.malaska.kafka.training

import java.util
import java.util.{Collections, Properties}

import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRebalanceListener, ConsumerRecord, KafkaConsumer}
import org.apache.kafka.common.TopicPartition


object BasicConsumerExample {
  def main(args:Array[String]): Unit = {
    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val topic = args(2)

    println("Setting up parameters")
    val props = new Properties()
    props.put("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    props.put(ConsumerConfig.GROUP_ID_CONFIG, "TrainingConsumer");
    props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true");
    props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
    props.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000");
    props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
    props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");

    println("Creating Consumer")
    val consumer = new KafkaConsumer[String,String](props)

    val listener = new RebalanceListener

    consumer.subscribe(Collections.singletonList(topic), listener)


    println("Starting Consumer")
    while (true) {
      val records = consumer.poll(1000)
      val it = records.iterator()
      while (it.hasNext) {
        val record = it.next()
        println("Received message: (" + record.key() + ", " + record.value() + ") at offset " + record.offset())
      }
    }
  }
}

class RebalanceListener extends ConsumerRebalanceListener {
  override def onPartitionsAssigned(collection: util.Collection[TopicPartition]): Unit = {
    print("Assigned Partitions:")
    val it = collection.iterator()
    while (it.hasNext) {
      print(it.next().partition() + ",")
    }
    println
  }

  override def onPartitionsRevoked(collection: util.Collection[TopicPartition]): Unit = {
    print("Revoked Partitions:")
    val it = collection.iterator()
    while (it.hasNext) {
      print(it.next().partition() + ",")
    }
    println
  }
} 
Example 11
Source File: LatencyConsumerExample.scala    From kafka_training   with Apache License 2.0 5 votes vote down vote up
package com.malaska.kafka.training


import java.util
import java.util.{Collections, Properties}

import net.liftweb.json.DefaultFormats
import net.liftweb.json.Serialization.read
import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer}

import scala.collection.mutable

object LatencyConsumerExample {
  def main(args:Array[String]): Unit = {
    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val topic = args(2)

    println("Setting up parameters")
    val props = new Properties()
    props.put("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    props.put(ConsumerConfig.GROUP_ID_CONFIG, "TrainingConsumer")
    props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true")
    props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000")
    props.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000")
    props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer")
    props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer")


    println("Creating Consumer")
    val consumer = new KafkaConsumer[String,String](props)
    consumer.subscribe(Collections.singletonList(topic))

    implicit val formats = DefaultFormats

    var maxLatency = 0l
    var minLatency = 100000l
    var latencyN = 0f
    var latencyCount = 0l
    val lastNLatencies = new mutable.MutableList[Long]

    println("Starting Consumer")
    while (true) {
      val records = consumer.poll(1000)
      val it = records.iterator()
      while (it.hasNext) {
        val record = it.next()
        val exampleMessage = read[ExampleMessage](record.value())
        val latency = System.currentTimeMillis() - exampleMessage.sentTime
        maxLatency = Math.max(latency, maxLatency)
        minLatency = Math.min(latency, minLatency)
        latencyN += latency
        latencyCount += 1
        lastNLatencies += latency


        if (latencyCount % 10 == 0) {
          println("MessageCount:" + latencyCount +
            ",MaxLatency:" + maxLatency +
            ",MinLatency:" + minLatency +
            ",AverageLatency:" + (latencyN/latencyCount) +
            ",LastN:(" + lastNLatencies.mkString(",") + ")")
          lastNLatencies.clear()
        }
      }
    }
  }
} 
Example 12
Source File: SimpleKafkaConsumer.scala    From remora   with MIT License 5 votes vote down vote up
import java.util.Properties

import com.fasterxml.jackson.databind.KeyDeserializer
import org.apache.kafka.clients.consumer.{ConsumerRecords, KafkaConsumer}
import org.apache.kafka.common.serialization.Deserializer
import net.manub.embeddedkafka.Codecs.stringDeserializer
import net.manub.embeddedkafka.ConsumerExtensions._

class SimpleKafkaConsumer[K,V](consumerProps : Properties,
                               topic : String,
                               keyDeserializer: Deserializer[K],
                               valueDeserializer: Deserializer[V],
                               function : ConsumerRecords[K, V] => Unit,
                               poll : Long = 2000) {

  private var running = false

  private val consumer = new KafkaConsumer[K, V](consumerProps, keyDeserializer, valueDeserializer)


  private val thread = new Thread {
    import scala.collection.JavaConverters._

    override def run: Unit = {
      consumer.subscribe(List(topic).asJava)
      consumer.partitionsFor(topic)

      while (running) {
        val record: ConsumerRecords[K, V] = consumer.poll(poll)
        function(record)
      }
    }
  }

  def start(): Unit = {
    if(!running) {
      running = true
      thread.start()
    }
  }

  def stop(): Unit = {
    if(running) {
      running = false
      thread.join()
      consumer.close()
    }
  }
} 
Example 13
Source File: GabKafkaSpout.scala    From Raphtory   with Apache License 2.0 5 votes vote down vote up
package com.raphtory.examples.gab.actors

import java.util
import java.util.Properties

import com.raphtory.core.components.Spout.SpoutTrait
import org.apache.kafka.clients.consumer.KafkaConsumer

import scala.collection.JavaConverters._
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.Duration
import scala.concurrent.duration.MILLISECONDS
import scala.concurrent.duration.SECONDS
import scala.util.Random
class GabKafkaSpout extends SpoutTrait {
  val x     = new Random().nextLong()
  val props = new Properties()
  props.put("bootstrap.servers", "moe.eecs.qmul.ac.uk:9092")
  props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
  props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
  props.put("auto.offset.reset", "earliest")
  props.put("group.id", "group" + x)
  val consumer: KafkaConsumer[String, String] = new KafkaConsumer[String, String](props)
  consumer.subscribe(util.Arrays.asList("gabResortedGraph"))

  protected def ProcessSpoutTask(message: Any): Unit = message match {
    case StartSpout => AllocateSpoutTask(Duration(1, MILLISECONDS), "newLine")
    case "newLine"  => consumeFromKafka()
    case _          => println("message not recognized!")
  }

  def consumeFromKafka() = {
    val record = consumer.poll(1000).asScala
    for (data <- record.iterator)
      sendTuple(data.value())
    AllocateSpoutTask(Duration(1, MILLISECONDS), "newLine")
  }
} 
Example 14
Source File: KafkaSpout.scala    From Raphtory   with Apache License 2.0 5 votes vote down vote up
package com.raphtory.spouts

import java.util
import java.util.Properties

import com.raphtory.core.components.Spout.SpoutTrait
import org.apache.kafka.clients.consumer.KafkaConsumer

import scala.collection.JavaConverters._
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.Duration
import scala.concurrent.duration.MILLISECONDS
import scala.concurrent.duration.SECONDS
import java.util.concurrent.LinkedBlockingQueue

import akka.actor.Props
import com.raphtory.core.components.Router.RouterManager

import scala.concurrent.duration.Duration
import scala.concurrent.duration._
import com.raphtory.core.utils.SchedulerUtil

import scala.util.Random
class KafkaSpout extends SpoutTrait {
  println("Starting kafka")
  var kafkaServer     = System.getenv().getOrDefault("KAFKA_ADDRESS", "127.0.0.1").trim
  var kafkaIP     = System.getenv().getOrDefault("KAFKA_PORT", "9092").trim
  var offset     = System.getenv().getOrDefault("KAFKA_OFFSET", "earliest").trim
  val x     = new Random().nextLong()
  var groupID   = System.getenv().getOrDefault("KAFKA_GROUP", "group" + x).trim
  var topic   = System.getenv().getOrDefault("KAFKA_TOPIC", "sample_topic").trim
  var restart   = System.getenv().getOrDefault("RESTART_RATE", "10").trim

  val queue = new LinkedBlockingQueue[String]

  val props = new Properties()
  props.put("bootstrap.servers", s"$kafkaServer:$kafkaIP")
  props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
  props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
  props.put("auto.offset.reset", offset)
  props.put("group.id", groupID)
  val consumer: KafkaConsumer[String, String] = new KafkaConsumer[String, String](props)
  consumer.subscribe(util.Arrays.asList(topic))


  //val helper = context.system.actorOf(Props(new KafkaSpoutBackPressure(queue)), "Spout_Helper")

  protected def ProcessSpoutTask(message: Any): Unit = message match {
    case StartSpout => AllocateSpoutTask(Duration(1, MILLISECONDS), "newLine")
    case "newLine"  => consumeFromKafka()
    case _          => println("message not recognized!")
  }

  def consumeFromKafka() = {
    //println("Consuming")
    val record = consumer.poll(java.time.Duration.ofMillis(5000)).asScala
    for (data <- record.iterator) {
      sendTuple(data.value())
      //helper ! KafkaData(data.value())
    }
    AllocateSpoutTask(Duration(restart.toInt, MILLISECONDS), "newLine")
  }
}
case class KafkaData(data:String)
class KafkaSpoutBackPressure(queue:LinkedBlockingQueue[String]) extends SpoutTrait {
  var startingSpeed     = System.getenv().getOrDefault("STARTING_SPEED", "1000").trim.toInt
  var increaseBy        = System.getenv().getOrDefault("INCREASE_BY", "1000").trim.toInt
  override def preStart(): Unit = {
    super.preStart()
    SchedulerUtil.scheduleTask(initialDelay = 60 seconds, interval = 60 second, receiver = self, message = "increase")
  }
  override protected def ProcessSpoutTask(receivedMessage: Any): Unit = receivedMessage match {
    case StartSpout => AllocateSpoutTask(Duration(1, MILLISECONDS), "newLine")
    case KafkaData(data) => queue.put(data)
    case "newLine"  => consumeFromQueue
    case "increase"  => startingSpeed+=increaseBy
    case _          => println("message not recognized!")
  }
  def consumeFromQueue() = {
    for(i<-0 to startingSpeed/100){
      if(!queue.isEmpty) {
        sendTuple(queue.take())
      }
    }
    AllocateSpoutTask(Duration(10, MILLISECONDS), "newLine")
  }
} 
Example 15
Source File: ConsumerExtensions.scala    From embedded-kafka   with MIT License 5 votes vote down vote up
package net.manub.embeddedkafka

import org.apache.kafka.clients.consumer.{ConsumerRecord, KafkaConsumer}
import org.apache.kafka.common.KafkaException

import scala.jdk.CollectionConverters._
import scala.concurrent.duration._
import scala.util.Try


    private def getNextBatch[T](poll: FiniteDuration, topics: Seq[String])(
        implicit decoder: ConsumerRecord[K, V] => T
    ): Seq[T] =
      Try {
        consumer.subscribe(topics.asJava)
        topics.foreach(consumer.partitionsFor)
        val records = consumer.poll(duration2JavaDuration(poll))
        // use toList to force eager evaluation. toSeq is lazy
        records.iterator().asScala.toList.map(decoder(_))
      }.recover {
        case ex: KafkaException => throw new KafkaUnavailableException(ex)
      }.get
  }
} 
Example 16
Source File: ConsumerAccess.scala    From zio-kafka   with Apache License 2.0 5 votes vote down vote up
package zio.kafka.consumer.internal

import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.common.errors.WakeupException
import org.apache.kafka.common.serialization.ByteArrayDeserializer
import zio._
import zio.blocking.{ blocking, Blocking }
import zio.kafka.consumer.ConsumerSettings
import zio.kafka.consumer.internal.ConsumerAccess.ByteArrayKafkaConsumer

import scala.jdk.CollectionConverters._

private[consumer] class ConsumerAccess(private[consumer] val consumer: ByteArrayKafkaConsumer, access: Semaphore) {
  def withConsumer[A](f: ByteArrayKafkaConsumer => A): RIO[Blocking, A] =
    withConsumerM[Any, A](c => ZIO(f(c)))

  def withConsumerM[R, A](f: ByteArrayKafkaConsumer => ZIO[R, Throwable, A]): ZIO[R with Blocking, Throwable, A] =
    access.withPermit(withConsumerNoPermit(f))

  private[consumer] def withConsumerNoPermit[R, A](
    f: ByteArrayKafkaConsumer => ZIO[R, Throwable, A]
  ): ZIO[R with Blocking, Throwable, A] =
    blocking(ZIO.effectSuspend(f(consumer))).catchSome {
      case _: WakeupException => ZIO.interrupt
    }.fork.flatMap(fib => fib.join.onInterrupt(ZIO.effectTotal(consumer.wakeup()) *> fib.interrupt))
}

private[consumer] object ConsumerAccess {
  type ByteArrayKafkaConsumer = KafkaConsumer[Array[Byte], Array[Byte]]

  def make(settings: ConsumerSettings) =
    for {
      access <- Semaphore.make(1).toManaged_
      consumer <- blocking {
                   ZIO {
                     new KafkaConsumer[Array[Byte], Array[Byte]](
                       settings.driverSettings.asJava,
                       new ByteArrayDeserializer(),
                       new ByteArrayDeserializer()
                     )
                   }
                 }.toManaged(c => blocking(access.withPermit(UIO(c.close(settings.closeTimeout.asJava)))))
    } yield new ConsumerAccess(consumer, access)
} 
Example 17
Source File: KafkaSinkTest.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.kafka

import java.util
import java.util.{Properties, UUID}

import io.eels.Row
import io.eels.datastream.DataStream
import io.eels.schema.{Field, StringType, StructType}
import net.manub.embeddedkafka.{EmbeddedKafka, EmbeddedKafkaConfig}
import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.kafka.common.serialization.{Deserializer, Serializer}
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}

import scala.collection.JavaConverters._
import scala.util.Try

class KafkaSinkTest extends FlatSpec with Matchers with BeforeAndAfterAll {

  implicit val kafkaConfig = EmbeddedKafkaConfig(
    kafkaPort = 6001,
    zooKeeperPort = 6000
  )
  Try {
    EmbeddedKafka.start()
  }

  val schema = StructType(
    Field("name", StringType, nullable = true),
    Field("location", StringType, nullable = true)
  )

  val ds = DataStream.fromValues(
    schema,
    Seq(
      Vector("clint eastwood", UUID.randomUUID().toString),
      Vector("elton john", UUID.randomUUID().toString)
    )
  )

  "KafkaSink" should "support default implicits" ignore {

    val topic = "mytopic-" + System.currentTimeMillis()

    val properties = new Properties()
    properties.put("bootstrap.servers", s"localhost:${kafkaConfig.kafkaPort}")
    properties.put("group.id", "test")
    properties.put("auto.offset.reset", "earliest")

    val producer = new KafkaProducer[String, Row](properties, StringSerializer, RowSerializer)
    val sink = KafkaSink(topic, producer)

    val consumer = new KafkaConsumer[String, String](properties, StringDeserializer, StringDeserializer)
    consumer.subscribe(util.Arrays.asList(topic))

    ds.to(sink)
    producer.close()

    val records = consumer.poll(4000)
    records.iterator().asScala.map(_.value).toList shouldBe ds.collect.map {
      case Row(_, values) => values.mkString(",")
    }.toList
  }
}

object RowSerializer extends Serializer[Row] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()
  override def serialize(topic: String, data: Row): Array[Byte] = data.values.mkString(",").getBytes
  override def close(): Unit = ()
}

object StringSerializer extends Serializer[String] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()
  override def close(): Unit = ()
  override def serialize(topic: String, data: String): Array[Byte] = data.getBytes
}

object StringDeserializer extends Deserializer[String] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = ()
  override def close(): Unit = ()
  override def deserialize(topic: String, data: Array[Byte]): String = new String(data)
} 
Example 18
Source File: PulseKafkaConsumer.scala    From pulse   with Apache License 2.0 5 votes vote down vote up
package io.phdata.pulse.logcollector

import java.util.{ Collections, Properties }

import org.apache.kafka.clients.consumer.KafkaConsumer
import com.typesafe.scalalogging.LazyLogging
import spray.json.JsonParser.ParsingException

import scala.collection.JavaConverters._
import spray.json._


class PulseKafkaConsumer(solrCloudStream: SolrCloudStream) extends JsonSupport with LazyLogging {
  val MAX_TIMEOUT = 100

  def read(consumerProperties: Properties, topic: String): Unit = {
    val consumer = new KafkaConsumer[String, String](consumerProperties)

    consumer.subscribe(Collections.singletonList(topic))

    while (true) {
      try {
        val records = consumer.poll(MAX_TIMEOUT)
        for (record <- records.asScala) {
          logger.trace("KAFKA: Consuming " + record.value() + " from topic: " + topic)
          val logEventMap = record.value().parseJson.convertTo[Map[String, String]]
          solrCloudStream.put(logEventMap.getOrElse("application", ""), logEventMap)
          // TODO: Add proper error handling when application isn't supplied
        }
      } catch {
        case p: ParsingException => logger.error("Error parsing message from kafka broker", p)
        case e: Exception        => logger.error("Error consuming messages from kafka broker", e)
      }
    }
  }
} 
Example 19
Source File: ConsumerExtensions.scala    From scalatest-embedded-kafka   with MIT License 5 votes vote down vote up
package net.manub.embeddedkafka

import org.apache.kafka.clients.consumer.{ConsumerRecord, KafkaConsumer}
import org.apache.kafka.common.KafkaException

import scala.util.Try


    private def getNextBatch[T](poll: Long, topics: Seq[String])(
        implicit decoder: ConsumerRecord[K, V] => T): Seq[T] =
      Try {
        import scala.collection.JavaConverters._
        consumer.subscribe(topics.asJava)
        topics.foreach(consumer.partitionsFor)
        val records = consumer.poll(java.time.Duration.ofMillis(poll))
        // use toList to force eager evaluation. toSeq is lazy
        records.iterator().asScala.toList.map(decoder(_))
      }.recover {
        case ex: KafkaException => throw new KafkaUnavailableException(ex)
      }.get
  }

}