org.apache.kafka.clients.consumer.ConsumerRecord Scala Examples

The following examples show how to use org.apache.kafka.clients.consumer.ConsumerRecord. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: BasicConsumerExample.scala    From kafka_training   with Apache License 2.0 5 votes vote down vote up
package com.malaska.kafka.training

import java.util
import java.util.{Collections, Properties}

import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRebalanceListener, ConsumerRecord, KafkaConsumer}
import org.apache.kafka.common.TopicPartition


object BasicConsumerExample {
  def main(args:Array[String]): Unit = {
    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val topic = args(2)

    println("Setting up parameters")
    val props = new Properties()
    props.put("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    props.put(ConsumerConfig.GROUP_ID_CONFIG, "TrainingConsumer");
    props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true");
    props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
    props.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000");
    props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
    props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");

    println("Creating Consumer")
    val consumer = new KafkaConsumer[String,String](props)

    val listener = new RebalanceListener

    consumer.subscribe(Collections.singletonList(topic), listener)


    println("Starting Consumer")
    while (true) {
      val records = consumer.poll(1000)
      val it = records.iterator()
      while (it.hasNext) {
        val record = it.next()
        println("Received message: (" + record.key() + ", " + record.value() + ") at offset " + record.offset())
      }
    }
  }
}

class RebalanceListener extends ConsumerRebalanceListener {
  override def onPartitionsAssigned(collection: util.Collection[TopicPartition]): Unit = {
    print("Assigned Partitions:")
    val it = collection.iterator()
    while (it.hasNext) {
      print(it.next().partition() + ",")
    }
    println
  }

  override def onPartitionsRevoked(collection: util.Collection[TopicPartition]): Unit = {
    print("Revoked Partitions:")
    val it = collection.iterator()
    while (it.hasNext) {
      print(it.next().partition() + ",")
    }
    println
  }
} 
Example 2
Source File: KafkaStreamingDemo.scala    From MaxCompute-Spark   with Apache License 2.0 5 votes vote down vote up
package com.aliyun.odps.spark.examples.streaming.kafka

import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}

object KafkaStreamingDemo {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession
      .builder()
      .appName("KafkaStreamingDemo")
      .getOrCreate()

    val ssc = new StreamingContext(spark.sparkContext, Seconds(5))

    // 请使用OSS作为Checkpoint存储
    ssc.checkpoint("oss://bucket/checkpointDir/")

    // kafka配置参数
    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> "192.168.1.1:9200,192.168.1.2:9200,192.168.1.3:9200",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "testGroupId",
      "auto.offset.reset" -> "latest",
      "enable.auto.commit" -> (false: java.lang.Boolean)
    )

    val topics = Set("event_topic")
    val recordDstream: InputDStream[ConsumerRecord[String, String]] =
      KafkaUtils.createDirectStream[String, String](
        ssc,
        LocationStrategies.PreferConsistent,
        ConsumerStrategies.Subscribe[String, String](topics, kafkaParams)
      )


    val dstream = recordDstream.map(f => (f.key(), f.value()))
    val data: DStream[String] = dstream.map(_._2)
    val wordsDStream: DStream[String] = data.flatMap(_.split(" "))
    val wordAndOneDstream: DStream[(String, Int)] = wordsDStream.map((_, 1))
    val result: DStream[(String, Int)] = wordAndOneDstream.reduceByKey(_ + _)
    result.print()

    ssc.start()
    ssc.awaitTermination()
  }
} 
Example 3
Source File: Kafka2OdpsDemo.scala    From MaxCompute-Spark   with Apache License 2.0 5 votes vote down vote up
package com.aliyun.odps.spark.examples.streaming.kafka

import com.aliyun.odps.spark.examples.streaming.common.SparkSessionSingleton
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer

import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}

object Kafka2OdpsDemo {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf().setAppName("test")
    val ssc = new StreamingContext(sparkConf, Seconds(10))

    // 请使用OSS作为Checkpoint存储,修改为有效OSS路径。OSS访问文档请参考 https://github.com/aliyun/MaxCompute-Spark/wiki/08.-Oss-Access%E6%96%87%E6%A1%A3%E8%AF%B4%E6%98%8E
    ssc.checkpoint("oss://bucket/checkpointdir")

    // kafka配置参数
    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> "localhost:9092",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "testGroupId",
      "auto.offset.reset" -> "latest",
      "enable.auto.commit" -> (false: java.lang.Boolean)
    )

    // 创建kafka dstream
    val topics = Set("test")
    val recordDstream: InputDStream[ConsumerRecord[String, String]] =
      KafkaUtils.createDirectStream[String, String](
        ssc,
        LocationStrategies.PreferConsistent,
        ConsumerStrategies.Subscribe[String, String](topics, kafkaParams)
      )
    val dstream = recordDstream.map(f => (f.key(), f.value()))
    // 解析kafka数据并写入odps
    val data: DStream[String] = dstream.map(_._2)
    val wordsDStream: DStream[String] = data.flatMap(_.split(" "))
    wordsDStream.foreachRDD(rdd => {
      val spark = SparkSessionSingleton.getInstance(rdd.sparkContext.getConf)
      import spark.implicits._

      rdd.toDF("id").write.mode("append").saveAsTable("test_table")
    })

    ssc.start()
    ssc.awaitTermination()
  }
} 
Example 4
Source File: Codecs.scala    From embedded-kafka-schema-registry   with MIT License 5 votes vote down vote up
package net.manub.embeddedkafka.schemaregistry

import org.apache.kafka.clients.consumer.ConsumerRecord

object Codecs {
  implicit def stringKeyGenericValueCrDecoder[V]
      : ConsumerRecord[String, V] => (String, V) =
    cr => (cr.key, cr.value)
  implicit def genericValueCrDecoder[V]: ConsumerRecord[String, V] => V =
    _.value
  implicit def stringKeyGenericValueTopicCrDecoder[V]
      : ConsumerRecord[String, V] => (String, String, V) =
    cr => (cr.topic, cr.key, cr.value)
} 
Example 5
Source File: Codecs.scala    From embedded-kafka-schema-registry   with MIT License 5 votes vote down vote up
package net.manub.embeddedkafka.schemaregistry.avro

import org.apache.avro.generic.GenericRecord
import org.apache.avro.specific.SpecificRecord
import org.apache.kafka.clients.consumer.ConsumerRecord

@deprecated(
  "Avro-related classes will be removed soon",
  since = "5.5.0"
)
object Codecs {
  implicit def stringKeyAvroValueCrDecoder[V <: SpecificRecord]
      : ConsumerRecord[String, V] => (String, V) =
    cr => (cr.key, cr.value)
  implicit def avroValueCrDecoder[V <: SpecificRecord]
      : ConsumerRecord[String, V] => V =
    _.value
  implicit def stringKeyAvroValueTopicCrDecoder[V <: SpecificRecord]
      : ConsumerRecord[String, V] => (String, String, V) =
    cr => (cr.topic, cr.key, cr.value)

  implicit def stringKeyGenericValueCrDecoder
      : ConsumerRecord[String, GenericRecord] => (String, GenericRecord) =
    cr => (cr.key, cr.value)

  implicit def genericKeyGenericValueCrDecoder
      : ConsumerRecord[GenericRecord, GenericRecord] => (
          GenericRecord,
          GenericRecord
      ) =
    cr => (cr.key, cr.value)
} 
Example 6
Source File: RecordProcessor.scala    From kafka-with-akka-streams-kafka-streams-tutorial   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.scala.kafka.client

import com.lightbend.scala.kafka.RecordProcessorTrait
import org.apache.kafka.clients.consumer.ConsumerRecord


class RecordProcessor extends RecordProcessorTrait[Array[Byte], Array[Byte]] {
  override def processRecord(record: ConsumerRecord[Array[Byte], Array[Byte]]): Unit = {
    RecordProcessor.count += 1
    val key = record.key()
    val value = record.value()
    println(s"Retrieved message #${RecordProcessor.count}: " +
      mkString("key", key) + ", " + mkString("value", value))
  }

  private def mkString(label: String, array: Array[Byte]) = {
    if (array == null) s"${label} = ${array}"
    else s"${label} = ${array}, size = ${array.size}, first 5 elements = ${array.take(5).mkString("[", ",", "]")}"
  }
}

object RecordProcessor {
  var count = 0L
} 
Example 7
Source File: PlainSourceConsumer.scala    From kafka-scala-api   with Apache License 2.0 5 votes vote down vote up
package com.example.consumer

import java.util.concurrent.atomic.AtomicLong

import akka.Done
import akka.kafka.Subscriptions
import akka.kafka.scaladsl.Consumer
import akka.stream.scaladsl.Sink
import com.example._
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.TopicPartition

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future

object PlainSourceConsumer extends App {

  val db = new DB
  db.loadOffset().foreach { fromOffset =>
    val partition = 0
    val subscription = Subscriptions.assignmentWithOffset(
      new TopicPartition(topic, partition) -> fromOffset
    )

    val done =
      Consumer.plainSource(consumerSettings, subscription)
        .mapAsync(1)(db.save)
        .runWith(Sink.ignore)
  }

}

//Zookeeper or DB storage mock
class DB {

  private val offset = new AtomicLong(2)

  def save(record: ConsumerRecord[Array[Byte], String]): Future[Done] = {
    println(s"DB.save: ${record.value}")
    offset.set(record.offset)
    Future.successful(Done)
  }

  def loadOffset(): Future[Long] =
    Future.successful(offset.get)

  def update(data: String): Future[Done] = {
    println(s"DB.update: $data")
    Future.successful(Done)
  }
} 
Example 8
Source File: KafkaStreamingLatestExample.scala    From kafka-scala-api   with Apache License 2.0 5 votes vote down vote up
package com.example.kafka010

import java.{util => ju}

import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010._
import org.apache.spark.{SparkContext, TaskContext}

import scala.collection.JavaConversions._
import com.example._

object KafkaStreamingLatestExample {

  def main(args: Array[String]): Unit = {
    kafkaStream010Checkpointing()
  }

  
  def kafkaStream010Itself() =
    launchWithItself(kafkaStreaming010, appName = "Kafka010_DirectStream")

  private def kafkaStreaming010(streamingContext: StreamingContext): Unit = {
    val topics = Array("sample_topic")
    val stream = KafkaUtils.createDirectStream[String, String](
      streamingContext,
      PreferConsistent, //It will consistently distribute partitions across all executors.
      Subscribe[String, String](topics, kafkaParams)
    )

    stream.map(record => (record.key, record.value)).print()

    stream.foreachRDD { rdd =>
      val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
      rdd.foreachPartition { _ =>
        val o: OffsetRange = offsetRanges(TaskContext.get.partitionId)
        println(s"${o.topic} ${o.partition} ${o.fromOffset} ${o.untilOffset}")
      }
    }

    storingOffsetsItself(stream)
  }

  private def storingOffsetsItself(stream: InputDStream[ConsumerRecord[String, String]]) = {
    stream.foreachRDD { rdd =>
      val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
      stream.asInstanceOf[CanCommitOffsets].commitAsync(offsetRanges)
    }
  }

  private def kafkaRdd010() = {
    val sparkContext = new SparkContext("local[*]", "kafkaRdd010")

    val offsetRanges = Array(
      // topic, partition, inclusive starting offset, exclusive ending offset
      OffsetRange("sample_topic", 0, 10, 20),
      OffsetRange("sample_topic", 1, 10, 20)
    )
    val params = new ju.HashMap[String, Object](kafkaParams)
    val kafkaRDD =  KafkaUtils.createRDD[String, String](sparkContext, params , offsetRanges, PreferConsistent)
    println(kafkaRDD.map(_.value()).first())
  }

} 
Example 9
Source File: FlinkKafkaCodecSerde.scala    From cloudflow   with Apache License 2.0 5 votes vote down vote up
package cloudflow.flink

import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.clients.consumer.ConsumerRecord

import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.streaming.connectors.kafka._

import cloudflow.streamlets.{ CodecInlet, CodecOutlet }

private[flink] class FlinkKafkaCodecSerializationSchema[T: TypeInformation](outlet: CodecOutlet[T], topic: String)
    extends KafkaSerializationSchema[T] {
  override def serialize(value: T, timestamp: java.lang.Long): ProducerRecord[Array[Byte], Array[Byte]] =
    new ProducerRecord(topic, outlet.codec.encode(value))
}

private[flink] class FlinkKafkaCodecDeserializationSchema[T: TypeInformation](inlet: CodecInlet[T]) extends KafkaDeserializationSchema[T] {
  override def deserialize(record: ConsumerRecord[Array[Byte], Array[Byte]]): T = inlet.codec.decode(record.value)
  override def isEndOfStream(value: T): Boolean                                 = false
  override def getProducedType: TypeInformation[T]                              = implicitly[TypeInformation[T]]
} 
Example 10
Source File: ConsumerBuilder.scala    From asura   with MIT License 5 votes vote down vote up
package asura.kafka

import akka.actor.ActorSystem
import akka.kafka.scaladsl.Consumer
import akka.kafka.scaladsl.Consumer.Control
import akka.kafka.{ConsumerSettings, Subscriptions}
import akka.stream.scaladsl.Source
import io.confluent.kafka.serializers.{AbstractKafkaAvroSerDeConfig, KafkaAvroDeserializer, KafkaAvroDeserializerConfig}
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.common.serialization.{Deserializer, StringDeserializer}

import scala.collection.JavaConverters._

object ConsumerBuilder {

  def buildAvroSource[V](
                          brokerUrl: String,
                          schemaRegisterUrl: String,
                          group: String,
                          topics: Set[String],
                          resetType: String = "latest",
                        )(implicit system: ActorSystem): Source[ConsumerRecord[String, V], Control] = {

    val kafkaAvroSerDeConfig = Map[String, Any](
      AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG -> schemaRegisterUrl,
      KafkaAvroDeserializerConfig.SPECIFIC_AVRO_READER_CONFIG -> true.toString
    )
    val consumerSettings: ConsumerSettings[String, V] = {
      val kafkaAvroDeserializer = new KafkaAvroDeserializer()
      kafkaAvroDeserializer.configure(kafkaAvroSerDeConfig.asJava, false)
      val deserializer = kafkaAvroDeserializer.asInstanceOf[Deserializer[V]]

      ConsumerSettings(system, new StringDeserializer, deserializer)
        .withBootstrapServers(brokerUrl)
        .withGroupId(group)
        .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, resetType)
    }
    Consumer.plainSource(consumerSettings, Subscriptions.topics(topics))
  }
} 
Example 11
Source File: package.scala    From kafka4s   with Apache License 2.0 5 votes vote down vote up
package com.banno.kafka

import org.scalacheck.{Arbitrary, Cogen, Gen}
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.clients.consumer.ConsumerRecord

package object test {

  implicit def arbitraryProducerRecord[K: Arbitrary, V: Arbitrary]
      : Arbitrary[ProducerRecord[K, V]] = Arbitrary {
    for {
      t <- Gen.identifier
      k <- Arbitrary.arbitrary[K]
      v <- Arbitrary.arbitrary[V]
    } yield new ProducerRecord(t, k, v)
  }

  implicit def arbitraryConsumerRecord[K: Arbitrary, V: Arbitrary]
      : Arbitrary[ConsumerRecord[K, V]] = Arbitrary {
    for {
      t <- Gen.identifier
      p <- Gen.posNum[Int]
      o <- Gen.posNum[Long]
      k <- Arbitrary.arbitrary[K]
      v <- Arbitrary.arbitrary[V]
    } yield new ConsumerRecord(t, p, o, k, v)
  }

  //these things are necessary for EqSpec
  implicit def producerRecordCogen[K, V]: Cogen[ProducerRecord[K, V]] =
    Cogen(pr => pr.key.toString.length.toLong + pr.value.toString.length.toLong) // ¯\_(ツ)_/¯
  implicit def consumerRecordCogen[K, V]: Cogen[ConsumerRecord[K, V]] =
    Cogen(cr => cr.key.toString.length.toLong + cr.value.toString.length.toLong) // ¯\_(ツ)_/¯
} 
Example 12
Source File: CurrentOffsetCounter.scala    From kafka4s   with Apache License 2.0 5 votes vote down vote up
package com.banno.kafka.metrics.prometheus

import cats.effect.Sync
import cats.implicits._
import org.apache.kafka.clients.consumer.ConsumerRecord
import io.prometheus.client._
import scala.math.max

object CurrentOffsetCounter {

  def apply[F[_]](cr: CollectorRegistry, prefix: String, clientId: String)(
      implicit F: Sync[F]
  ): F[ConsumerRecord[_, _] => F[Unit]] =
    F.delay {
        Counter
          .build()
          .name(prefix + "_current_offset")
          .help("Counter for last consumed (not necessarily committed) offset of topic partition.")
          .labelNames("client_id", "topic", "partition")
          .register(cr)
      }
      .map { counter => (record: ConsumerRecord[_, _]) =>
        for {
          value <- F.delay(counter.labels(clientId, record.topic, record.partition.toString).get)
          delta = max(0, record.offset.toDouble - value)
          _ <- F.delay(counter.labels(clientId, record.topic, record.partition.toString).inc(delta))
        } yield ()
      }
} 
Example 13
Source File: DirectKafkaWordCount.scala    From spark-secure-kafka-app   with Apache License 2.0 5 votes vote down vote up
package com.cloudera.spark.examples

import org.apache.kafka.clients.consumer.ConsumerRecord

import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, LocationStrategies, KafkaUtils}
import org.apache.spark.streaming._

object DirectKafkaWordCount {
    def main(args: Array[String]) {
      if (args.length < 2) {
        System.err.println(s"""
                              |Usage: DirectKafkaWordCount <brokers> <topics>
                              |  <brokers> is a list of one or more Kafka brokers
                              |  <topics> is a list of one or more kafka topics to consume from
                              |  <ssl> true if using SSL, false otherwise.
                              |
        """.stripMargin)
        System.exit(1)
      }

      val Array(brokers, topics, ssl) = args

      // Create context with 2 second batch interval
      val sparkConf = new SparkConf().setAppName("DirectKafkaWordCount")
      val ssc = new StreamingContext(sparkConf, Seconds(2))
      val isUsingSsl = ssl.toBoolean

      // Create direct kafka stream with brokers and topics
      val topicsSet = topics.split(",").toSet
      val commonParams = Map[String, Object](
        "bootstrap.servers" -> brokers,
        "security.protocol" -> (if (isUsingSsl) "SASL_SSL" else "SASL_PLAINTEXT"),
        "sasl.kerberos.service.name" -> "kafka",
        "auto.offset.reset" -> "earliest",
        "key.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer",
        "value.deserializer" -> "org.apache.kafka.common.serialization.StringDeserializer",
        "group.id" -> "default",
        "enable.auto.commit" -> (false: java.lang.Boolean)
      )

      val additionalSslParams = if (isUsingSsl) {
        Map(
          "ssl.truststore.location" -> "/etc/cdep-ssl-conf/CA_STANDARD/truststore.jks",
          "ssl.truststore.password" -> "cloudera"
        )
      } else {
        Map.empty
      }

      val kafkaParams = commonParams ++ additionalSslParams

      val messages: InputDStream[ConsumerRecord[String, String]] =
        KafkaUtils.createDirectStream[String, String](
          ssc,
          LocationStrategies.PreferConsistent,
          ConsumerStrategies.Subscribe[String, String](topicsSet, kafkaParams)
        )

      // Get the lines, split them into words, count the words and print
      val lines = messages.map(_.value())
      val words = lines.flatMap(_.split(" "))
      val wordCounts = words.map(x => (x, 1L)).reduceByKey(_ + _)
      wordCounts.print()

      // Start the computation
      ssc.start()
      ssc.awaitTermination()
    }
} 
Example 14
Source File: KafkaUtility.scala    From real-time-stream-processing-engine   with Apache License 2.0 5 votes vote down vote up
package com.knoldus.streaming.kafka

import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}


object KafkaUtility {

  //TODO It should read from config
  private val kafkaParams = Map(
    "bootstrap.servers" -> "localhost:9092",
    "key.deserializer" -> classOf[StringDeserializer],
    "value.deserializer" -> classOf[StringDeserializer],
    "auto.offset.reset" -> "earliest",
    "group.id" -> "tweet-consumer"
  )

  private val preferredHosts = LocationStrategies.PreferConsistent


  def createDStreamFromKafka(ssc: StreamingContext, topics: List[String]): InputDStream[ConsumerRecord[String, String]] =
    KafkaUtils.createDirectStream[String, String](
      ssc,
      preferredHosts,
      ConsumerStrategies.Subscribe[String, String](topics.distinct, kafkaParams)
    )

} 
Example 15
Source File: ProcessingKafkaApplication.scala    From Akka-Cookbook   with MIT License 5 votes vote down vote up
package com.packt.chapter8

import akka.actor.ActorSystem
import akka.kafka.scaladsl.{Consumer, Producer}
import akka.kafka.{ConsumerSettings, ProducerSettings, Subscriptions}
import akka.stream.{ActorMaterializer, ClosedShape}
import akka.stream.scaladsl.{Flow, GraphDSL, RunnableGraph, Sink, Source}
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer, StringDeserializer, StringSerializer}

import scala.concurrent.duration._

object ProcessingKafkaApplication extends App {
  implicit val actorSystem = ActorSystem("SimpleStream")
  implicit val actorMaterializer = ActorMaterializer()

  val bootstrapServers = "localhost:9092"
  val kafkaTopic = "akka_streams_topic"
  val partition = 0
  val subscription = Subscriptions.assignment(new TopicPartition(kafkaTopic, partition))

  val consumerSettings = ConsumerSettings(actorSystem, new ByteArrayDeserializer, new StringDeserializer)
    .withBootstrapServers(bootstrapServers)
    .withGroupId("akka_streams_group")
    .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

  val producerSettings = ProducerSettings(actorSystem, new ByteArraySerializer, new StringSerializer)
    .withBootstrapServers(bootstrapServers)

  val runnableGraph = RunnableGraph.fromGraph(GraphDSL.create() { implicit builder =>
    import GraphDSL.Implicits._

    val tickSource = Source.tick(0 seconds, 5 seconds, "Hello from Akka Streams using Kafka!")
    val kafkaSource = Consumer.plainSource(consumerSettings, subscription)
    val kafkaSink = Producer.plainSink(producerSettings)
    val printlnSink = Sink.foreach(println)

    val mapToProducerRecord = Flow[String].map(elem => new ProducerRecord[Array[Byte], String](kafkaTopic, elem))
    val mapFromConsumerRecord = Flow[ConsumerRecord[Array[Byte], String]].map(record => record.value())

    tickSource  ~> mapToProducerRecord   ~> kafkaSink
    kafkaSource ~> mapFromConsumerRecord ~> printlnSink

    ClosedShape
  })

  runnableGraph.run()
} 
Example 16
Source File: Codecs.scala    From embedded-kafka   with MIT License 5 votes vote down vote up
package net.manub.embeddedkafka

import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization._


object Codecs {
  implicit val stringSerializer: Serializer[String] = new StringSerializer()
  implicit val nullSerializer: Serializer[Array[Byte]] =
    new ByteArraySerializer()

  implicit val stringDeserializer: Deserializer[String] =
    new StringDeserializer()
  implicit val nullDeserializer: Deserializer[Array[Byte]] =
    new ByteArrayDeserializer()

  implicit val stringKeyValueCrDecoder
      : ConsumerRecord[String, String] => (String, String) =
    cr => (cr.key(), cr.value)
  implicit val stringValueCrDecoder: ConsumerRecord[String, String] => String =
    _.value()
  implicit val stringKeyValueTopicCrDecoder
      : ConsumerRecord[String, String] => (String, String, String) = cr =>
    (cr.topic(), cr.key(), cr.value())

  implicit val keyNullValueCrDecoder
      : ConsumerRecord[String, Array[Byte]] => (String, Array[Byte]) =
    cr => (cr.key(), cr.value)
  implicit val nullValueCrDecoder
      : ConsumerRecord[String, Array[Byte]] => Array[Byte] = _.value()
  implicit val keyNullValueTopicCrDecoder
      : ConsumerRecord[String, Array[Byte]] => (String, String, Array[Byte]) =
    cr => (cr.topic(), cr.key(), cr.value())
} 
Example 17
Source File: ConsumerExtensions.scala    From embedded-kafka   with MIT License 5 votes vote down vote up
package net.manub.embeddedkafka

import org.apache.kafka.clients.consumer.{ConsumerRecord, KafkaConsumer}
import org.apache.kafka.common.KafkaException

import scala.jdk.CollectionConverters._
import scala.concurrent.duration._
import scala.util.Try


    private def getNextBatch[T](poll: FiniteDuration, topics: Seq[String])(
        implicit decoder: ConsumerRecord[K, V] => T
    ): Seq[T] =
      Try {
        consumer.subscribe(topics.asJava)
        topics.foreach(consumer.partitionsFor)
        val records = consumer.poll(duration2JavaDuration(poll))
        // use toList to force eager evaluation. toSeq is lazy
        records.iterator().asScala.toList.map(decoder(_))
      }.recover {
        case ex: KafkaException => throw new KafkaUnavailableException(ex)
      }.get
  }
} 
Example 18
Source File: CommittableRecord.scala    From zio-kafka   with Apache License 2.0 5 votes vote down vote up
package zio.kafka.consumer

import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.TopicPartition
import zio.{ RIO, Task }
import zio.kafka.serde.Deserializer

final case class CommittableRecord[K, V](record: ConsumerRecord[K, V], offset: Offset) {
  def deserializeWith[R, K1, V1](
    keyDeserializer: Deserializer[R, K1],
    valueDeserializer: Deserializer[R, V1]
  )(implicit ev1: K <:< Array[Byte], ev2: V <:< Array[Byte]): RIO[R, CommittableRecord[K1, V1]] =
    for {
      key   <- keyDeserializer.deserialize(record.topic(), record.headers(), record.key())
      value <- valueDeserializer.deserialize(record.topic(), record.headers(), record.value())
    } yield {
      copy(
        record = new ConsumerRecord[K1, V1](
          record.topic(),
          record.partition(),
          record.offset(),
          record.timestamp(),
          record.timestampType(),
          ConsumerRecord.NULL_CHECKSUM, // Checksum is deprecated
          record.serializedKeySize(),
          record.serializedValueSize(),
          key,
          value,
          record.headers()
        )
      )
    }

  def key: K          = record.key
  def value: V        = record.value()
  def partition: Int  = record.partition()
  def timestamp: Long = record.timestamp()
}

object CommittableRecord {
  def apply[K, V](
    record: ConsumerRecord[K, V],
    commitHandle: Map[TopicPartition, Long] => Task[Unit]
  ): CommittableRecord[K, V] =
    CommittableRecord(
      record,
      OffsetImpl(new TopicPartition(record.topic(), record.partition()), record.offset(), commitHandle)
    )
} 
Example 19
Source File: TopicsEndpoint.scala    From hydra   with Apache License 2.0 5 votes vote down vote up
package hydra.kafka.endpoints

import akka.actor.ActorSelection
import akka.http.scaladsl.common.EntityStreamingSupport
import akka.kafka.Subscriptions
import akka.kafka.scaladsl.Consumer
import akka.pattern.ask
import akka.util.Timeout
import hydra.core.http.RouteSupport
import hydra.kafka.consumer.KafkaConsumerProxy.{GetLatestOffsets, LatestOffsetsResponse}
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.TopicPartition

import scala.collection.immutable.Map
import scala.concurrent.duration._
import scala.concurrent.{Await, ExecutionContext, Future}


class TopicsEndpoint(consumerProxy:ActorSelection)(implicit ec:ExecutionContext) extends RouteSupport {

  import hydra.kafka.util.KafkaUtils._

  implicit val jsonStreamingSupport = EntityStreamingSupport.json()

  override val route =
    path("transports" / "kafka" / "consumer" / "topics" / Segment) {
      topicName =>
        get {
          extractRequestContext { ctx =>
            parameters('format.?, 'group.?, 'n ? 10, 'start ? "earliest") {
              (format, groupId, n, startOffset) =>
                val settings = loadConsumerSettings[Any, Any](
                  format.getOrElse("avro"),
                  groupId.getOrElse("hydra"),
                  startOffset
                )
                val offsets = latestOffsets(topicName)
                val source = Consumer
                  .plainSource(settings, Subscriptions.topics(topicName))
                  .initialTimeout(5.seconds)
                  .zipWithIndex
                  .takeWhile(rec =>
                    rec._2 <= n && !shouldCancel(offsets, rec._1)
                  )
                  .map(rec => rec._1.value().toString)
                  .watchTermination()((_, termination) =>
                    termination.failed.foreach {
                      case cause => ctx.fail(cause)
                    }
                  )
                complete(source)

            }
          }
        }
    }

  def shouldCancel(
      fpartitions: Future[Map[TopicPartition, Long]],
      record: ConsumerRecord[Any, Any]
  ): Boolean = {
    if (fpartitions.isCompleted) {
      val partitions = Await.result(fpartitions, 1.millis)
      val tp = new TopicPartition(record.topic(), record.partition())
      partitions.get(tp) match {
        case Some(offset) => record.offset() >= offset
        case None         => false
      }
    } else {
      false
    }

  }

  private def latestOffsets(
      topic: String
  ): Future[Map[TopicPartition, Long]] = {
    implicit val timeout = Timeout(5 seconds)
    (consumerProxy ? GetLatestOffsets(topic))
      .mapTo[LatestOffsetsResponse]
      .map(_.offsets)
  }

} 
Example 20
Source File: Codecs.scala    From scalatest-embedded-kafka   with MIT License 5 votes vote down vote up
package net.manub.embeddedkafka.schemaregistry.avro

import org.apache.avro.specific.SpecificRecord
import org.apache.kafka.clients.consumer.ConsumerRecord

object Codecs {

  implicit def stringKeyAvroValueCrDecoder[V <: SpecificRecord]
    : ConsumerRecord[String, V] => (String, V) =
    cr => (cr.key(), cr.value)
  implicit def avroValueCrDecoder[V <: SpecificRecord]
    : ConsumerRecord[String, V] => V =
    _.value()
  implicit def stringKeyAvroValueTopicCrDecoder[V <: SpecificRecord]
    : ConsumerRecord[String, V] => (String, String, V) =
    cr => (cr.topic(), cr.key(), cr.value())

} 
Example 21
Source File: Codecs.scala    From scalatest-embedded-kafka   with MIT License 5 votes vote down vote up
package net.manub.embeddedkafka

import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization._


object Codecs {
  implicit val stringSerializer: Serializer[String] = new StringSerializer()
  implicit val nullSerializer: Serializer[Array[Byte]] =
    new ByteArraySerializer()

  implicit val stringDeserializer: Deserializer[String] =
    new StringDeserializer()
  implicit val nullDeserializer: Deserializer[Array[Byte]] =
    new ByteArrayDeserializer()

  implicit val stringKeyValueCrDecoder
    : ConsumerRecord[String, String] => (String, String) =
    cr => (cr.key(), cr.value)
  implicit val stringValueCrDecoder: ConsumerRecord[String, String] => String =
    _.value()
  implicit val stringKeyValueTopicCrDecoder
    : ConsumerRecord[String, String] => (String, String, String) = cr =>
    (cr.topic(), cr.key(), cr.value())

  implicit val keyNullValueCrDecoder
    : ConsumerRecord[String, Array[Byte]] => (String, Array[Byte]) =
    cr => (cr.key(), cr.value)
  implicit val nullValueCrDecoder
    : ConsumerRecord[String, Array[Byte]] => Array[Byte] = _.value()
  implicit val keyNullValueTopicCrDecoder
    : ConsumerRecord[String, Array[Byte]] => (String, String, Array[Byte]) =
    cr => (cr.topic(), cr.key(), cr.value())
} 
Example 22
Source File: ConsumerExtensions.scala    From scalatest-embedded-kafka   with MIT License 5 votes vote down vote up
package net.manub.embeddedkafka

import org.apache.kafka.clients.consumer.{ConsumerRecord, KafkaConsumer}
import org.apache.kafka.common.KafkaException

import scala.util.Try


    private def getNextBatch[T](poll: Long, topics: Seq[String])(
        implicit decoder: ConsumerRecord[K, V] => T): Seq[T] =
      Try {
        import scala.collection.JavaConverters._
        consumer.subscribe(topics.asJava)
        topics.foreach(consumer.partitionsFor)
        val records = consumer.poll(java.time.Duration.ofMillis(poll))
        // use toList to force eager evaluation. toSeq is lazy
        records.iterator().asScala.toList.map(decoder(_))
      }.recover {
        case ex: KafkaException => throw new KafkaUnavailableException(ex)
      }.get
  }

} 
Example 23
Source File: CachedKafkaConsumer.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import java.{util => ju}

import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord, KafkaConsumer}
import org.apache.kafka.common.TopicPartition

import org.apache.spark.{SparkEnv, SparkException, TaskContext}
import org.apache.spark.internal.Logging



  def getOrCreate(
      topic: String,
      partition: Int,
      kafkaParams: ju.Map[String, Object]): CachedKafkaConsumer = synchronized {
    val groupId = kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG).asInstanceOf[String]
    val topicPartition = new TopicPartition(topic, partition)
    val key = CacheKey(groupId, topicPartition)

    // If this is reattempt at running the task, then invalidate cache and start with
    // a new consumer
    if (TaskContext.get != null && TaskContext.get.attemptNumber > 1) {
      cache.remove(key)
      new CachedKafkaConsumer(topicPartition, kafkaParams)
    } else {
      if (!cache.containsKey(key)) {
        cache.put(key, new CachedKafkaConsumer(topicPartition, kafkaParams))
      }
      cache.get(key)
    }
  }
}