org.apache.kafka.clients.consumer.ConsumerConfig Scala Examples

The following examples show how to use org.apache.kafka.clients.consumer.ConsumerConfig. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: IntegrationTest.scala    From kmq   with Apache License 2.0 6 votes vote down vote up
package com.softwaremill.kmq.redelivery

import java.time.Duration
import java.util.Random

import akka.actor.ActorSystem
import akka.kafka.scaladsl.{Consumer, Producer}
import akka.kafka.{ConsumerSettings, ProducerMessage, ProducerSettings, Subscriptions}
import akka.stream.ActorMaterializer
import akka.testkit.TestKit
import com.softwaremill.kmq._
import com.softwaremill.kmq.redelivery.infrastructure.KafkaSpec
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.{ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringDeserializer
import org.scalatest.concurrent.Eventually
import org.scalatest.time.{Seconds, Span}
import org.scalatest.{BeforeAndAfterAll, FlatSpecLike, Matchers}

import scala.collection.mutable.ArrayBuffer

class IntegrationTest extends TestKit(ActorSystem("test-system")) with FlatSpecLike with KafkaSpec with BeforeAndAfterAll with Eventually with Matchers {

  implicit val materializer = ActorMaterializer()
  import system.dispatcher

  "KMQ" should "resend message if not committed" in {
    val bootstrapServer = s"localhost:${testKafkaConfig.kafkaPort}"
    val kmqConfig = new KmqConfig("queue", "markers", "kmq_client", "kmq_redelivery", Duration.ofSeconds(1).toMillis,
    1000)

    val consumerSettings = ConsumerSettings(system, new StringDeserializer, new StringDeserializer)
      .withBootstrapServers(bootstrapServer)
      .withGroupId(kmqConfig.getMsgConsumerGroupId)
      .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

    val markerProducerSettings = ProducerSettings(system,
      new MarkerKey.MarkerKeySerializer(), new MarkerValue.MarkerValueSerializer())
      .withBootstrapServers(bootstrapServer)
      .withProperty(ProducerConfig.PARTITIONER_CLASS_CONFIG, classOf[ParititionFromMarkerKey].getName)
    val markerProducer = markerProducerSettings.createKafkaProducer()

    val random = new Random()

    lazy val processedMessages = ArrayBuffer[String]()
    lazy val receivedMessages = ArrayBuffer[String]()

    val control = Consumer.committableSource(consumerSettings, Subscriptions.topics(kmqConfig.getMsgTopic)) // 1. get messages from topic
      .map { msg =>
      ProducerMessage.Message(
        new ProducerRecord[MarkerKey, MarkerValue](kmqConfig.getMarkerTopic, MarkerKey.fromRecord(msg.record), new StartMarker(kmqConfig.getMsgTimeoutMs)), msg)
    }
      .via(Producer.flow(markerProducerSettings, markerProducer)) // 2. write the "start" marker
      .map(_.message.passThrough)
      .mapAsync(1) { msg =>
        msg.committableOffset.commitScaladsl().map(_ => msg.record) // this should be batched
      }
      .map { msg =>
        receivedMessages += msg.value
        msg
      }
      .filter(_ => random.nextInt(5) != 0)
      .map { processedMessage =>
        processedMessages += processedMessage.value
        new ProducerRecord[MarkerKey, MarkerValue](kmqConfig.getMarkerTopic, MarkerKey.fromRecord(processedMessage), EndMarker.INSTANCE)
      }
      .to(Producer.plainSink(markerProducerSettings, markerProducer)) // 5. write "end" markers
      .run()

    val redeliveryHook = RedeliveryTracker.start(new KafkaClients(bootstrapServer), kmqConfig)

    val messages = (0 to 20).map(_.toString)
    messages.foreach(msg => sendToKafka(kmqConfig.getMsgTopic,msg))

    eventually {
      receivedMessages.size should be > processedMessages.size
      processedMessages.sortBy(_.toInt).distinct shouldBe messages
    }(PatienceConfig(timeout = Span(15, Seconds)), implicitly)

    redeliveryHook.close()
    control.shutdown()
  }

  override def afterAll(): Unit = {
    super.afterAll()
    TestKit.shutdownActorSystem(system)
  }
} 
Example 2
Source File: KafkaTestClient.scala    From haystack-traces   with Apache License 2.0 5 votes vote down vote up
package com.expedia.www.haystack.trace.indexer.integration.clients

import java.util.Properties

import com.expedia.www.haystack.trace.indexer.config.entities.KafkaConfiguration
import com.expedia.www.haystack.trace.indexer.integration.serdes.{SnappyCompressedSpanBufferProtoDeserializer, SpanProtoSerializer}
import com.expedia.www.haystack.trace.indexer.serde.SpanDeserializer
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.ProducerConfig
import org.apache.kafka.common.serialization.{ByteArraySerializer, StringDeserializer, StringSerializer}
import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster

object KafkaTestClient {
  val KAFKA_CLUSTER = new EmbeddedKafkaCluster(1)
  KAFKA_CLUSTER.start()
}

class KafkaTestClient {
  import KafkaTestClient._

  val INPUT_TOPIC = "spans"
  val OUTPUT_TOPIC = "span-buffer"

  val APP_PRODUCER_CONFIG: Properties = {
    val props = new Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    props.put(ProducerConfig.ACKS_CONFIG, "1")
    props.put(ProducerConfig.BATCH_SIZE_CONFIG, "20")
    props.put(ProducerConfig.RETRIES_CONFIG, "0")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[ByteArraySerializer])
    props
  }

  val APP_CONSUMER_CONFIG: Properties = new Properties()

  val TEST_PRODUCER_CONFIG: Properties = {
    val props = new Properties()
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    props.put(ProducerConfig.ACKS_CONFIG, "1")
    props.put(ProducerConfig.BATCH_SIZE_CONFIG, "20")
    props.put(ProducerConfig.RETRIES_CONFIG, "0")
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[SpanProtoSerializer])
    props
  }

  val RESULT_CONSUMER_CONFIG = new Properties()

  def buildConfig = KafkaConfiguration(numStreamThreads = 1,
    pollTimeoutMs = 100,
    APP_CONSUMER_CONFIG, APP_PRODUCER_CONFIG, OUTPUT_TOPIC, INPUT_TOPIC,
    consumerCloseTimeoutInMillis = 3000,
    commitOffsetRetries = 3,
    commitBackoffInMillis = 250,
    maxWakeups = 5, wakeupTimeoutInMillis = 3000)

  def prepare(appId: String): Unit = {
    APP_CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    APP_CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, appId + "-app-consumer")
    APP_CONSUMER_CONFIG.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
    APP_CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
    APP_CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[SpanDeserializer])
    APP_CONSUMER_CONFIG.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")

    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, KAFKA_CLUSTER.bootstrapServers)
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.GROUP_ID_CONFIG, appId + "-result-consumer")
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
    RESULT_CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[SnappyCompressedSpanBufferProtoDeserializer])

    deleteTopics(INPUT_TOPIC, OUTPUT_TOPIC)
    KAFKA_CLUSTER.createTopic(INPUT_TOPIC, 2, 1)
    KAFKA_CLUSTER.createTopic(OUTPUT_TOPIC)
  }

  private def deleteTopics(topics: String*): Unit = KAFKA_CLUSTER.deleteTopicsAndWait(topics:_*)
} 
Example 3
Source File: Consumers.scala    From Fast-Data-Processing-Systems-with-SMACK-Stack   with MIT License 5 votes vote down vote up
import akka.kafka._
import akka.kafka.scaladsl._
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.kafka.common.serialization.ByteArrayDeserializer
import org.apache.kafka.clients.consumer.ConsumerConfig

val consumerSettings = ConsumerSettings(system, new ByteArrayDeserializer, new StringDeserializer)
  .withBootstrapServers("localhost:9092")
  .withGroupId("group1")
  .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

// Consume messages and store a representation, including offset, in DB example:
db.loadOffset().foreach { fromOffset =>
	val subscription = Subscriptions.assignmentWithOffset(new TopicPartition("topic1", 1) -> fromOffset)
    Consumer.plainSource(consumerSettings, subscription)
      .mapAsync(1)(db.save)}

// Consume messages at-most-once example:
Consumer.atMostOnceSource(consumerSettings.withClientId("client1"), Subscriptions.topics("topic1"))
    .mapAsync(1) { record =>
      rocket.launch(record.value
}

// Consume messages at-least-once example:
Consumer.committableSource(consumerSettings.withClientId("client1"), Subscriptions.topics("topic1"))
    .mapAsync(1) { 
      msg => db.update(msg.value).flatMap(_ =>   
        msg.committableOffset.commitScaladsl())
}

// Connect a Consumer to Producer example: 
Consumer.committableSource(consumerSettings.withClientId("client1"))
    .map(msg => ProducerMessage.Message(
        new ProducerRecord[Array[Byte], String]("topic2", msg.value), 
          msg.committableOffset))
    .to(Producer.commitableSink(producerSettings))

// Consume messages at-least-once, and commit in batches example:
Consumer.committableSource(consumerSettings.withClientId("client1"), Subscriptions.topics("topic1"))
    .mapAsync(1) { msg =>
      db.update(msg.value).map(_ => msg.committableOffset)
    }
    .batch(max = 10, first => 
        CommittableOffsetBatch.empty.updated(first)) { (batch, elem) =>
          batch.updated(elem)
    }.mapAsync(1)(_.commitScaladsl())

// A reusable Kafka consumer example:
val consumer: ActorRef = system.actorOf(KafkaConsumerActor.props(consumerSettings))

// Manually assign topic partition to it
val stream1 = Consumer
    .plainExternalSource[Array[Byte], String](consumer, Subscriptions.assignment(new TopicPartition("topic1", 1)))
    .via(business)
    .to(Sink.ignore)

// Manually assign another topic partition
val stream2 = Consumer
    .plainExternalSource[Array[Byte], String](consumer, Subscriptions.assignment(new TopicPartition("topic1", 2)))
    .via(business)
    .to(Sink.ignore)

// Consumer group example:
val consumerGroup = Consumer.committablePartitionedSource(consumerSettings.withClientId("client1"), Subscriptions.topics("topic1"))

// Process each assigned partition separately
  consumerGroup.map {
    case (topicPartition, source) =>
      source
        .via(business)
        .toMat(Sink.ignore)(Keep.both)
        .run()
  }.mapAsyncUnordered(maxPartitions)(_._2) 
Example 4
Source File: KafkaConsumerActor.scala    From coral   with Apache License 2.0 5 votes vote down vote up
package io.coral.actors.connector

import java.util.Properties

import akka.actor.Props
import io.coral.actors.CoralActor
import io.coral.actors.connector.KafkaConsumerActor.{StopReadingMessageQueue, ReadMessageQueue}
import io.coral.lib.{ConfigurationBuilder, KafkaJsonConsumer}
import kafka.serializer.Decoder
import kafka.tools.MessageFormatter
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.json4s.JsonAST.{JNothing, JObject, JValue}

object KafkaConsumerActor {
	case class ReadMessageQueue()
	case class StopReadingMessageQueue()

	implicit val formats = org.json4s.DefaultFormats
	val builder = new ConfigurationBuilder("kafka.consumer")

	def getParams(json: JValue) = {
		for {
			kafka <- (json \ "params" \ "kafka").extractOpt[JObject]
			topic <- (json \ "params" \ "topic").extractOpt[String]
		} yield {
			val properties = consumerProperties(kafka)
			(properties, topic)
		}
	}

	def consumerProperties(json: JObject): Properties = {
		val properties = builder.properties

		json.values.foreach {
			case (k: String, v: String) =>
				properties.setProperty(k, v)
		}

		properties
	}

	def apply(json: JValue): Option[Props] = {
		getParams(json).map(_ => Props(classOf[KafkaConsumerActor], json, KafkaJsonConsumer()))
	}

	def apply(json: JValue, decoder: Decoder[JValue]): Option[Props] = {
		getParams(json).map(_ => Props(classOf[KafkaConsumerActor], json, KafkaJsonConsumer(decoder)))
	}
}

class KafkaConsumerActor(json: JObject, connection: KafkaJsonConsumer) extends CoralActor(json) {
	val (properties, topic) = KafkaConsumerActor.getParams(json).get
	lazy val stream = connection.stream(topic, properties)
	var shouldStop = false

	override def preStart(): Unit = {
		super.preStart()
	}

	override def receiveExtra: Receive = {
		case ReadMessageQueue() if stream.hasNextInTime =>
			val message: JValue = stream.next
			stream.commitOffsets

			if (message != JNothing) {
				emit(message)
			}

			if (!shouldStop) {
				self ! ReadMessageQueue()
			}
		case ReadMessageQueue() =>
			self ! ReadMessageQueue()
		case StopReadingMessageQueue() =>
			shouldStop = true
	}

	
} 
Example 5
Source File: MessageListener.scala    From model-serving-tutorial   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.modelserving.client

import java.time.Duration
import java.util.Properties

import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer}
import org.apache.kafka.common.serialization.ByteArrayDeserializer


class MessageListener[K, V](
                             brokers: String,
                             topic: String,
                             group: String,
                             keyDeserealizer: String,
                             valueDeserealizer: String,
                             processor: RecordProcessorTrait[K, V]) extends Runnable {

  import MessageListener._

  import scala.collection.JavaConverters._

  val consumer = new KafkaConsumer[K, V](consumerProperties(brokers, group, keyDeserealizer, valueDeserealizer))
  consumer.subscribe(Seq(topic).asJava)
  var completed = false

  def complete(): Unit = {
    completed = true
  }

  override def run(): Unit = {
    while (!completed) {
      val records = consumer.poll(Duration.ofMillis(100)).asScala
      for (record <- records) {
        processor.processRecord(record)
      }
    }
    consumer.close()
    System.out.println("Listener completes")
  }

  def start(): Unit = {
    val t = new Thread(this)
    t.start()
  }
} 
Example 6
Source File: TFServingModelServer.scala    From model-serving-tutorial   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.modelserving.tensorflowserving

import akka.actor.typed.scaladsl.Behaviors
import akka.actor.typed.scaladsl.adapter._
import akka.actor.typed.{ActorRef, ActorSystem}
import akka.http.scaladsl.Http
import akka.kafka.scaladsl.Consumer
import akka.kafka.{ConsumerSettings, Subscriptions}
import akka.stream.scaladsl.Sink
import akka.stream.typed.scaladsl.{ActorFlow, ActorMaterializer}
import akka.util.Timeout
import com.lightbend.modelserving.configuration.ModelServingConfiguration
import com.lightbend.modelserving.model.ServingResult
import com.lightbend.modelserving.winemodel.DataRecord
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.ByteArrayDeserializer

import scala.concurrent.duration._
import scala.util.Success


object TFServingModelServer {

  import ModelServingConfiguration._

  // Initialization

  implicit val modelServer = ActorSystem(
    Behaviors.setup[TFModelServerActor](
      context => new TFModelServerBehaviour(context)), "ModelServing")

  implicit val materializer = ActorMaterializer()
  implicit val executionContext = modelServer.executionContext
  implicit val askTimeout = Timeout(30.seconds)

  // Configuration properties for the Kafka topic.
  val dataSettings = ConsumerSettings(modelServer.toUntyped, new ByteArrayDeserializer, new ByteArrayDeserializer)
    .withBootstrapServers(KAFKA_BROKER)
    .withGroupId(DATA_GROUP)
    .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

  def main(args: Array[String]): Unit = {

    println(s"Akka application that uses TensorFlow Serving, brokers $KAFKA_BROKER")

    // Data stream processing
    Consumer.atMostOnceSource(dataSettings, Subscriptions.topics(DATA_TOPIC))
      .map(record => DataRecord.wineFromByteArray(record.value)).collect { case Success(a) => a }
      .via(ActorFlow.ask(1)(modelServer)((elem, replyTo : ActorRef[Option[ServingResult[Double]]]) => new ServeData(replyTo, elem)))
      .collect{ case Some(result) => result}
      .runWith(Sink.foreach(result =>
        println(s"Model served in ${System.currentTimeMillis() - result.submissionTs} ms, with result ${result.result} " +
          s"(model ${result.name}, data type ${result.dataType})")))
    // Rest Server
    startRest(modelServer)
  }

  def startRest(modelServerManager: ActorSystem[TFModelServerActor]): Unit = {

    implicit val timeout = Timeout(10.seconds)
    implicit val system = modelServerManager.toUntyped

    val host = "0.0.0.0"
    val port = MODELSERVING_PORT
    val routes = TFQueriesAkkaHttpResource.storeRoutes(modelServerManager)(modelServerManager.scheduler)

    val _ = Http().bindAndHandle(routes, host, port) map
      { binding =>
        println(s"Starting models observer on port ${binding.localAddress}") } recover {
      case ex =>
        println(s"Models observer could not bind to $host:$port - ${ex.getMessage}")
    }
  }
} 
Example 7
Source File: KafkaSupport.scala    From model-serving-tutorial   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.modelserving.spark

import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.ByteArrayDeserializer



@SerialVersionUID(102L)
object KafkaSupport extends Serializable {

  // Kafka consumer properties
  private val sessionTimeout: Int = 10 * 1000
  private val connectionTimeout: Int = 8 * 1000
  private val AUTOCOMMITINTERVAL: String = "1000"
  // Frequency off offset commits
  private val SESSIONTIMEOUT: String = "30000"
  // The timeout used to detect failures - should be greater then processing time
  private val MAXPOLLRECORDS: String = "10"
  // Max number of records consumed in a single poll
  private val GROUPID: String = "Spark Streaming" // Consumer ID

  def getKafkaConsumerConfig(brokers: String): Map[String, String] = {
    Map[String, String](
      ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokers,
      ConsumerConfig.GROUP_ID_CONFIG -> GROUPID,
      ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> "true",
      ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG -> AUTOCOMMITINTERVAL,
      ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG -> SESSIONTIMEOUT,
      ConsumerConfig.MAX_POLL_RECORDS_CONFIG -> MAXPOLLRECORDS,
      ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "earliest",
      ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[ByteArrayDeserializer].getTypeName,
      ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[ByteArrayDeserializer].getTypeName)
  }
} 
Example 8
Source File: MessageListener.scala    From kafka-with-akka-streams-kafka-streams-tutorial   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.scala.kafka

import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer}
import org.apache.kafka.common.serialization.ByteArrayDeserializer

import scala.collection.JavaConverters._


object MessageListener {
  private val AUTOCOMMITINTERVAL = "1000" // Frequency off offset commits
  private val SESSIONTIMEOUT = "30000"    // The timeout used to detect failures - should be greater then processing time
  private val MAXPOLLRECORDS = "10"       // Max number of records consumed in a single poll

  def consumerProperties(brokers: String, group: String, keyDeserealizer: String, valueDeserealizer: String): Map[String, Object] = {
    Map[String, Object](
      ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokers,
      ConsumerConfig.GROUP_ID_CONFIG -> group,
      ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> "true",
      ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG -> AUTOCOMMITINTERVAL,
      ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG -> SESSIONTIMEOUT,
      ConsumerConfig.MAX_POLL_RECORDS_CONFIG -> MAXPOLLRECORDS,
      ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "earliest",
      ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> keyDeserealizer,
      ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> valueDeserealizer
    )
  }

  def apply[K, V](brokers: String, topic: String, group: String,
                  processor: RecordProcessorTrait[K, V]): MessageListener[K, V] =
    new MessageListener[K, V](brokers, topic, group, classOf[ByteArrayDeserializer].getName, classOf[ByteArrayDeserializer].getName, processor)
}

class MessageListener[K, V](brokers: String, topic: String, group: String, keyDeserealizer: String, valueDeserealizer: String,
                            processor: RecordProcessorTrait[K, V]) extends Runnable {

  import MessageListener._

  val consumer = new KafkaConsumer[K, V](consumerProperties(brokers, group, keyDeserealizer, valueDeserealizer).asJava)
  consumer.subscribe(Seq(topic).asJava)
  var completed = false

  def complete(): Unit = {
    completed = true
  }

  override def run(): Unit = {
    while (!completed) {
      val records = consumer.poll(100)
      for (record <- records.asScala) {
        processor.processRecord(record)
      }
    }
    consumer.close()
    System.out.println("Listener completes")
  }

  def start(): Unit = {
    val t = new Thread(this)
    t.start()
  }
} 
Example 9
Source File: package.scala    From kafka-scala-api   with Apache License 2.0 5 votes vote down vote up
package com

import akka.actor.ActorSystem
import akka.kafka.{ConsumerSettings, ProducerSettings}
import akka.stream.ActorMaterializer
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer, StringDeserializer, StringSerializer}

package object example {
  implicit val system = ActorSystem("FlowProducerMain")
  implicit val materializer = ActorMaterializer()

  val producerSettings = ProducerSettings(system, new ByteArraySerializer, new StringSerializer)
    .withBootstrapServers("localhost:9092")

  val topic = "sample_topic"
  val topic1 = "topic1"
  val topic2 = "topic2"


  val consumerSettings =
    ConsumerSettings(system, new ByteArrayDeserializer, new StringDeserializer)
      .withBootstrapServers("localhost:9092")
      .withGroupId("group1")
      .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest")
} 
Example 10
Source File: Application.scala    From kafka-serde-scala   with Apache License 2.0 5 votes vote down vote up
package io.github.azhur.kafkaserdescala.example
import java.util.Properties

import io.github.azhur.kafkaserdecirce.CirceSupport
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.streams.{ KafkaStreams, StreamsConfig, Topology }
import org.apache.kafka.streams.scala.StreamsBuilder

object Application extends App with CirceSupport {
  import io.circe.generic.auto._
  import org.apache.kafka.streams.scala.Serdes._
  import org.apache.kafka.streams.scala.ImplicitConversions._

  case class User(id: Long, name: String, age: Int)

  val topology = buildTopology("input_users", "output_users")

  val streamingApp = new KafkaStreams(topology, streamProperties())
  streamingApp.start()

  sys.addShutdownHook({
    streamingApp.close()
  })

  def buildTopology(inputTopic: String, outputTopic: String): Topology = {
    val streamsBuilder = new StreamsBuilder()
    streamsBuilder
      .stream[String, User](inputTopic)
      .filter((_, user) => user.age > 18)
      .to(outputTopic)

    streamsBuilder.build()
  }

  def streamProperties(): Properties = {
    val streamsConfiguration = new Properties
    streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "test-app")
    streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092")
    streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,
                             Topology.AutoOffsetReset.EARLIEST.toString.toLowerCase)
    streamsConfiguration
  }
} 
Example 11
Source File: CountIntByStreaming.scala    From wow-spark   with MIT License 5 votes vote down vote up
package com.sev7e0.wow.kafka

import com.sev7e0.wow.spark_streaming.StreamingLogger
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
;

object CountIntByStreaming {

  val brokerList = "localhost:9092"
  val topic = "randomCount"
  val groupId = "group";
  val path = "temp/checkpoint/CountIntBySS";
  val master = "local";

  def main(args: Array[String]): Unit = {
    val prop = initProperties()
    val topics = Array(topic)

    //设置打印日志级别
    StreamingLogger.setLoggerLevel()

    val sparkConf = new SparkConf()
      .setAppName(CountIntByStreaming.getClass.getName)
      .setMaster(master)

    //实例化StreamingContext,设置间隔两秒
    val ssc = new StreamingContext(sparkConf, Seconds(2))

    //设置checkpoint路径
    ssc.checkpoint(path)


    //使用KafkaUtils获取DStream
    val kafkaDS = KafkaUtils.createDirectStream[String, String](
      ssc,
      LocationStrategies.PreferConsistent,
      ConsumerStrategies.Subscribe[String, String](topics, prop))


    kafkaDS.map(record => {
      val value = record.value().toLong
      value
    }).reduce(_ + _).print()

    
  def initProperties(): Map[String, Object] = Map[String, Object](
    ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
    ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
    ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokerList,
    ConsumerConfig.GROUP_ID_CONFIG -> groupId,
    ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "latest",
    ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (false: java.lang.Boolean)
  )
} 
Example 12
Source File: TestAvroConsumer.scala    From asura   with MIT License 5 votes vote down vote up
package asura.kafka.consumer

import akka.actor.ActorSystem
import akka.kafka.scaladsl.Consumer
import akka.kafka.{ConsumerSettings, Subscriptions}
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.{Keep, Sink}
import asura.kafka.avro.SampleAvroClass
import com.typesafe.scalalogging.StrictLogging
import io.confluent.kafka.serializers.{AbstractKafkaAvroSerDeConfig, KafkaAvroDeserializer, KafkaAvroDeserializerConfig}
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization._

import scala.collection.JavaConverters._

object TestAvroConsumer extends StrictLogging {

  def main(args: Array[String]): Unit = {

    implicit val system = ActorSystem("consumer")
    implicit val materializer = ActorMaterializer()
    implicit val ec = system.dispatcher

    val schemaRegistryUrl = ""
    val bootstrapServers = ""
    val topic = ""
    val group = ""

    val kafkaAvroSerDeConfig = Map[String, Any](
      AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG -> schemaRegistryUrl,
      KafkaAvroDeserializerConfig.SPECIFIC_AVRO_READER_CONFIG -> true.toString
    )
    val consumerSettings: ConsumerSettings[String, SampleAvroClass] = {
      val kafkaAvroDeserializer = new KafkaAvroDeserializer()
      kafkaAvroDeserializer.configure(kafkaAvroSerDeConfig.asJava, false)
      val deserializer = kafkaAvroDeserializer.asInstanceOf[Deserializer[SampleAvroClass]]

      ConsumerSettings(system, new StringDeserializer, deserializer)
        .withBootstrapServers(bootstrapServers)
        .withGroupId(group)
        .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
    }

    val samples = (1 to 3)
    val (control, result) = Consumer
      .plainSource(consumerSettings, Subscriptions.topics(topic))
      .take(samples.size.toLong)
      .map(_.value())
      .toMat(Sink.seq)(Keep.both)
      .run()

    control.shutdown()
    result.map(records => records.foreach(record => logger.info(s"${record}")))
  }
} 
Example 13
Source File: ConsumerBuilder.scala    From asura   with MIT License 5 votes vote down vote up
package asura.kafka

import akka.actor.ActorSystem
import akka.kafka.scaladsl.Consumer
import akka.kafka.scaladsl.Consumer.Control
import akka.kafka.{ConsumerSettings, Subscriptions}
import akka.stream.scaladsl.Source
import io.confluent.kafka.serializers.{AbstractKafkaAvroSerDeConfig, KafkaAvroDeserializer, KafkaAvroDeserializerConfig}
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.common.serialization.{Deserializer, StringDeserializer}

import scala.collection.JavaConverters._

object ConsumerBuilder {

  def buildAvroSource[V](
                          brokerUrl: String,
                          schemaRegisterUrl: String,
                          group: String,
                          topics: Set[String],
                          resetType: String = "latest",
                        )(implicit system: ActorSystem): Source[ConsumerRecord[String, V], Control] = {

    val kafkaAvroSerDeConfig = Map[String, Any](
      AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG -> schemaRegisterUrl,
      KafkaAvroDeserializerConfig.SPECIFIC_AVRO_READER_CONFIG -> true.toString
    )
    val consumerSettings: ConsumerSettings[String, V] = {
      val kafkaAvroDeserializer = new KafkaAvroDeserializer()
      kafkaAvroDeserializer.configure(kafkaAvroSerDeConfig.asJava, false)
      val deserializer = kafkaAvroDeserializer.asInstanceOf[Deserializer[V]]

      ConsumerSettings(system, new StringDeserializer, deserializer)
        .withBootstrapServers(brokerUrl)
        .withGroupId(group)
        .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, resetType)
    }
    Consumer.plainSource(consumerSettings, Subscriptions.topics(topics))
  }
} 
Example 14
Source File: KafkaDistributedProcessingTest.scala    From aecor   with MIT License 5 votes vote down vote up
package aecor.kafkadistributedprocessing

import cats.effect.concurrent.{ Deferred, Ref }
import cats.effect.{ ExitCase, IO }
import cats.implicits._
import fs2.concurrent.Queue
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.scalatest.funsuite.AnyFunSuiteLike
import scala.concurrent.duration._
class KafkaDistributedProcessingTest extends AnyFunSuiteLike with KafkaSupport with IOSupport {

  val topicName = "process-distribution"

  createCustomTopic(topicName, partitions = 4)

  val settings =
    DistributedProcessingSettings(Set(s"localhost:${kafkaConfig.kafkaPort}"), topicName)

  test("Process error propagation") {
    val exception = new RuntimeException("Oops!")

    val result = DistributedProcessing(settings)
      .start("Process error propagation", List(IO.raiseError[Unit](exception)))
      .attempt
      .timeout(20.seconds)
      .unsafeRunSync()

    assert(result == Left(exception))
  }

  test("Process lifecycle") {

    val test = Ref.of[IO, (Boolean, Boolean)]((false, false)).flatMap { ref =>
      Deferred[IO, Unit]
        .flatMap { done =>
          val process =
            ref.set((true, false)) >>
              done.complete(()) >>
              IO.never.guaranteeCase {
                case ExitCase.Canceled => ref.set((true, true))
                case _                 => IO.unit
              }.void

          val run = DistributedProcessing(settings)
            .start("Process lifecycle", List(process))

          IO.race(run, done.get) >> ref.get
        }
    }

    val (started, finished) = test.timeout(20.seconds).unsafeRunSync()

    assert(started)
    assert(finished)
  }

  test("Process distribution") {
    val test = Queue.unbounded[IO, Int].flatMap { queue =>
      def run(client: Int) =
        DistributedProcessing(
          settings.withConsumerSetting(ConsumerConfig.CLIENT_ID_CONFIG, client.toString)
        ).start(
          "Process distribution",
          Stream
            .from(0)
            .take(8)
            .map { n =>
              val idx = client * 10 + n
              (queue.enqueue1(idx) >> IO.cancelBoundary <* IO.never)
                .guarantee(queue.enqueue1(-idx))
            }
            .toList
        )

      def dequeue(size: Long): IO[List[Int]] =
        queue.dequeue.take(size).compile.to[List]

      for {
        d1 <- run(1).start
        s1 <- dequeue(8)
        d2 <- run(2).start
        s2 <- dequeue(16)
        _ <- d1.cancel
        s3 <- dequeue(16)
        _ <- d2.cancel
        s4 <- dequeue(8)
      } yield (s1, s2, s3, s4)
    }

    val (s1, s2, s3, s4) = test.timeout(20.seconds).unsafeRunSync()

    assert(s1.toSet == Set(10, 11, 12, 13, 14, 15, 16, 17))
    assert((s1 ++ s2 ++ s3 ++ s4).sum == 0)
  }

} 
Example 15
Source File: KafkaTest.scala    From aecor   with MIT License 5 votes vote down vote up
package aecor.kafkadistributedprocessing

import java.util.Properties

import aecor.kafkadistributedprocessing.internal.Kafka.UnitDeserializer
import aecor.kafkadistributedprocessing.internal.RebalanceEvents.RebalanceEvent
import aecor.kafkadistributedprocessing.internal.RebalanceEvents.RebalanceEvent.{
  PartitionsAssigned,
  PartitionsRevoked
}
import aecor.kafkadistributedprocessing.internal.{ Kafka, KafkaConsumer }
import cats.effect.IO
import cats.implicits._
import fs2.Stream
import fs2.concurrent.Queue
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.scalatest.funsuite.AnyFunSuite

import scala.concurrent.duration._

class KafkaTest extends AnyFunSuite with IOSupport with KafkaSupport {
  val topic = "test"
  val partitionCount = 4

  createCustomTopic(topic, partitions = partitionCount)

  val createConsumerAccess = {
    val properties = new Properties()
    properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers.mkString(","))
    properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "test")
    KafkaConsumer.create[IO](properties, new UnitDeserializer, new UnitDeserializer)
  }

  val watchRebalanceEvents =
    Stream
      .resource(createConsumerAccess)
      .flatMap(Kafka.watchRebalanceEvents(_, topic, 500.millis, 50.millis))

  test("Rebalance event stream") {

    val program = for {
      queue <- Queue.unbounded[IO, (Int, RebalanceEvent)]

      run = (n: Int) =>
        watchRebalanceEvents
          .evalMap { x =>
            val e = n -> x.value
            queue.enqueue1(e) >> x.commit
          }
          .compile
          .drain
          .start

      p1 <- run(1)

      l1 <- queue.dequeue.take(2).compile.toList

      p2 <- run(2)

      l2 <- queue.dequeue.take(4).compile.toList

      _ <- p1.cancel

      l3 <- queue.dequeue.take(2).compile.toList

      _ <- p2.cancel

    } yield (l1, l2, l3)

    val (l1, l2, l3) = program.unsafeRunTimed(40.seconds).get

    def fold(list: List[(Int, RebalanceEvent)]): Map[Int, Set[Int]] =
      list.foldLeft(Map.empty[Int, Set[Int]]) {
        case (s, (c, e)) =>
          e match {
            case PartitionsRevoked(partitions) =>
              s.updated(c, s.getOrElse(c, Set.empty[Int]) -- partitions.map(_.partition()))
            case PartitionsAssigned(partitions) =>
              s.updated(c, s.getOrElse(c, Set.empty[Int]) ++ partitions.map(_.partition()))
          }
      }

    assert(fold(l1) == Map(1 -> Set(1, 0, 3, 2)))
    assert(fold(l2) == Map(1 -> Set(1, 0), 2 -> Set(2, 3)))
    assert(fold(l3) == Map(2 -> Set(1, 0, 3, 2)))

  }

  test("Topic partitions query works before subscription") {
    val program = createConsumerAccess.use(_.partitionsFor(topic))
    val result = program.unsafeRunTimed(2.seconds).get
    assert(result.size == partitionCount)
  }

} 
Example 16
Source File: DistributedProcessing.scala    From aecor   with MIT License 5 votes vote down vote up
package aecor.kafkadistributedprocessing

import java.util.Properties

import aecor.kafkadistributedprocessing.internal.Kafka
import aecor.kafkadistributedprocessing.internal.Kafka._
import cats.effect.{ ConcurrentEffect, ContextShift, Timer }
import cats.implicits._
import cats.effect.implicits._
import fs2.Stream
import org.apache.kafka.clients.consumer.ConsumerConfig

import scala.concurrent.duration._

final class DistributedProcessing(settings: DistributedProcessingSettings) {

  private def assignRange(size: Int, partitionCount: Int, partition: Int): Option[(Int, Int)] = {
    val even = size / partitionCount
    val reminder = size % partitionCount
    if (partition >= partitionCount) {
      none
    } else {
      if (partition < reminder) {
        (partition * (even + 1), even + 1).some
      } else if (even > 0) {
        (reminder + partition * even, even).some
      } else none
    }
  }

  
  def start[F[_]: ConcurrentEffect: Timer: ContextShift](name: String,
                                                         processes: List[F[Unit]]): F[Unit] =
    Kafka
      .assignPartitions(
        settings.asProperties(name),
        settings.topicName,
        settings.pollingInterval,
        settings.pollTimeout
      )
      .parEvalMapUnordered(Int.MaxValue) {
        case AssignedPartition(partition, partitionCount, watchRevocation, release) =>
          assignRange(processes.size, partitionCount, partition).fold(release) {
            case (offset, processCount) =>
              Stream
                .range[F](offset, offset + processCount)
                .parEvalMapUnordered(processCount)(processes)
                .compile
                .drain
                .race(watchRevocation)
                .flatMap {
                  case Left(_)         => release
                  case Right(callback) => callback
                }
          }
      }
      .compile
      .drain
}

object DistributedProcessing {
  def apply(settings: DistributedProcessingSettings): DistributedProcessing =
    new DistributedProcessing(settings)
}

final case class DistributedProcessingSettings(brokers: Set[String],
                                               topicName: String,
                                               pollingInterval: FiniteDuration = 500.millis,
                                               pollTimeout: FiniteDuration = 50.millis,
                                               consumerSettings: Map[String, String] = Map.empty) {
  def withClientId(clientId: String): DistributedProcessingSettings =
    withConsumerSetting(ConsumerConfig.CLIENT_ID_CONFIG, clientId)

  def clientId: Option[String] = consumerSettings.get(ConsumerConfig.CLIENT_ID_CONFIG)

  def withConsumerSetting(key: String, value: String): DistributedProcessingSettings =
    copy(consumerSettings = consumerSettings.updated(key, value))

  def asProperties(groupId: String): Properties = {
    val properties = new Properties()
    consumerSettings.foreach {
      case (key, value) => properties.setProperty(key, value)
    }
    properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers.mkString(","))
    properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId)
    properties
  }

} 
Example 17
Source File: ConfigurationLoaderSpec.scala    From haystack-traces   with Apache License 2.0 5 votes vote down vote up
package com.expedia.www.haystack.trace.indexer.unit

import com.expedia.www.haystack.trace.commons.packer.PackerType
import com.expedia.www.haystack.trace.indexer.config.ProjectConfiguration
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.ProducerConfig
import org.scalatest.{FunSpec, Matchers}

class ConfigurationLoaderSpec extends FunSpec with Matchers {

  val project = new ProjectConfiguration()
  describe("Configuration loader") {

    it("should load the health status config from base.conf") {
      project.healthStatusFilePath shouldEqual "/app/isHealthy"
    }

    it("should load the span buffer config only from base.conf") {
      val config = project.spanAccumulateConfig
      config.pollIntervalMillis shouldBe 2000L
      config.maxEntriesAllStores shouldBe 20000
      config.bufferingWindowMillis shouldBe 10000L
      config.packerType shouldEqual PackerType.SNAPPY
    }

    it("should load the kafka config from base.conf and one stream property from env variable") {
      val kafkaConfig = project.kafkaConfig
      kafkaConfig.produceTopic shouldBe "span-buffer"
      kafkaConfig.consumeTopic shouldBe "spans"
      kafkaConfig.numStreamThreads shouldBe 2
      kafkaConfig.commitOffsetRetries shouldBe 3
      kafkaConfig.commitBackoffInMillis shouldBe 200

      kafkaConfig.maxWakeups shouldBe 5
      kafkaConfig.wakeupTimeoutInMillis shouldBe 5000

      kafkaConfig.consumerProps.getProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG) shouldBe "kafkasvc:9092"
      kafkaConfig.consumerProps.getProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG) shouldBe "earliest"
      kafkaConfig.consumerProps.getProperty(ConsumerConfig.GROUP_ID_CONFIG) shouldBe "haystack-trace-indexer"
      kafkaConfig.consumerProps.getProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG) shouldBe "false"
      kafkaConfig.consumerProps.getProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG) shouldBe "org.apache.kafka.common.serialization.StringDeserializer"
      kafkaConfig.consumerProps.getProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG) shouldBe "com.expedia.www.haystack.trace.indexer.serde.SpanDeserializer"

      kafkaConfig.consumerCloseTimeoutInMillis shouldBe 30000

      kafkaConfig.producerProps.getProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG) shouldBe "kafkasvc:9092"
      kafkaConfig.producerProps.getProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG) shouldBe "org.apache.kafka.common.serialization.ByteArraySerializer"
      kafkaConfig.producerProps.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG) shouldBe "org.apache.kafka.common.serialization.StringSerializer"
    }

    it("should load the service metadata config from base.conf") {
      val config = project.serviceMetadataWriteConfig
      config.flushIntervalInSec shouldBe 60
      config.flushOnMaxOperationCount shouldBe 10000
      config.esEndpoint shouldBe "http://elasticsearch:9200"
      config.maxInFlightBulkRequests shouldBe 10
      config.maxDocsInBulk shouldBe 100
      config.maxBulkDocSizeInBytes shouldBe 1000000
      config.indexTemplateJson shouldBe Some("some_template_json")
      config.consistencyLevel shouldBe "one"
      config.readTimeoutMillis shouldBe 5000
      config.connectionTimeoutMillis shouldBe 10000
      config.indexName shouldBe "service-metadata"
      config.indexType shouldBe "metadata"
      config.retryConfig.maxRetries shouldBe 10
      config.retryConfig.backOffInMillis shouldBe 100
      config.retryConfig.backoffFactor shouldBe 2
    }

    it("should load the trace backend config from base.conf and few properties overridden from env variable") {
      val backendConfiguration = project.backendConfig

      backendConfiguration.maxInFlightRequests shouldBe 100
    }

    it("should load the elastic search config from base.conf and one property overridden from env variable") {
      val elastic = project.elasticSearchConfig
      elastic.endpoint shouldBe "http://elasticsearch:9200"
      elastic.maxInFlightBulkRequests shouldBe 10
      elastic.maxDocsInBulk shouldBe 100
      elastic.maxBulkDocSizeInBytes shouldBe 1000000
      elastic.indexTemplateJson shouldBe Some("some_template_json")
      elastic.consistencyLevel shouldBe "one"
      elastic.readTimeoutMillis shouldBe 5000
      elastic.connectionTimeoutMillis shouldBe 10000
      elastic.indexNamePrefix shouldBe "haystack-test"
      elastic.indexType shouldBe "spans"
      elastic.retryConfig.maxRetries shouldBe 10
      elastic.retryConfig.backOffInMillis shouldBe 1000
      elastic.retryConfig.backoffFactor shouldBe 2
      elastic.indexHourBucket shouldBe 6
      elastic.maxConnectionsPerRoute shouldBe 10

      elastic.awsRequestSigningConfiguration.enabled shouldEqual false
      elastic.awsRequestSigningConfiguration.region shouldEqual "us-west-2"
      elastic.awsRequestSigningConfiguration.awsServiceName shouldEqual "es"
      elastic.awsRequestSigningConfiguration.accessKey shouldBe None
      elastic.awsRequestSigningConfiguration.secretKey shouldBe None
    }
  }
} 
Example 18
Source File: BasicConsumerExample.scala    From kafka_training   with Apache License 2.0 5 votes vote down vote up
package com.malaska.kafka.training

import java.util
import java.util.{Collections, Properties}

import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRebalanceListener, ConsumerRecord, KafkaConsumer}
import org.apache.kafka.common.TopicPartition


object BasicConsumerExample {
  def main(args:Array[String]): Unit = {
    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val topic = args(2)

    println("Setting up parameters")
    val props = new Properties()
    props.put("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    props.put(ConsumerConfig.GROUP_ID_CONFIG, "TrainingConsumer");
    props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true");
    props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
    props.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000");
    props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
    props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");

    println("Creating Consumer")
    val consumer = new KafkaConsumer[String,String](props)

    val listener = new RebalanceListener

    consumer.subscribe(Collections.singletonList(topic), listener)


    println("Starting Consumer")
    while (true) {
      val records = consumer.poll(1000)
      val it = records.iterator()
      while (it.hasNext) {
        val record = it.next()
        println("Received message: (" + record.key() + ", " + record.value() + ") at offset " + record.offset())
      }
    }
  }
}

class RebalanceListener extends ConsumerRebalanceListener {
  override def onPartitionsAssigned(collection: util.Collection[TopicPartition]): Unit = {
    print("Assigned Partitions:")
    val it = collection.iterator()
    while (it.hasNext) {
      print(it.next().partition() + ",")
    }
    println
  }

  override def onPartitionsRevoked(collection: util.Collection[TopicPartition]): Unit = {
    print("Revoked Partitions:")
    val it = collection.iterator()
    while (it.hasNext) {
      print(it.next().partition() + ",")
    }
    println
  }
} 
Example 19
Source File: LatencyConsumerExample.scala    From kafka_training   with Apache License 2.0 5 votes vote down vote up
package com.malaska.kafka.training


import java.util
import java.util.{Collections, Properties}

import net.liftweb.json.DefaultFormats
import net.liftweb.json.Serialization.read
import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer}

import scala.collection.mutable

object LatencyConsumerExample {
  def main(args:Array[String]): Unit = {
    val kafkaServerURL = args(0)
    val kafkaServerPort = args(1)
    val topic = args(2)

    println("Setting up parameters")
    val props = new Properties()
    props.put("bootstrap.servers", kafkaServerURL + ":" + kafkaServerPort)
    props.put(ConsumerConfig.GROUP_ID_CONFIG, "TrainingConsumer")
    props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true")
    props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000")
    props.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "30000")
    props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer")
    props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer")


    println("Creating Consumer")
    val consumer = new KafkaConsumer[String,String](props)
    consumer.subscribe(Collections.singletonList(topic))

    implicit val formats = DefaultFormats

    var maxLatency = 0l
    var minLatency = 100000l
    var latencyN = 0f
    var latencyCount = 0l
    val lastNLatencies = new mutable.MutableList[Long]

    println("Starting Consumer")
    while (true) {
      val records = consumer.poll(1000)
      val it = records.iterator()
      while (it.hasNext) {
        val record = it.next()
        val exampleMessage = read[ExampleMessage](record.value())
        val latency = System.currentTimeMillis() - exampleMessage.sentTime
        maxLatency = Math.max(latency, maxLatency)
        minLatency = Math.min(latency, minLatency)
        latencyN += latency
        latencyCount += 1
        lastNLatencies += latency


        if (latencyCount % 10 == 0) {
          println("MessageCount:" + latencyCount +
            ",MaxLatency:" + maxLatency +
            ",MinLatency:" + minLatency +
            ",AverageLatency:" + (latencyN/latencyCount) +
            ",LastN:(" + lastNLatencies.mkString(",") + ")")
          lastNLatencies.clear()
        }
      }
    }
  }
} 
Example 20
Source File: ProcessingKafkaApplication.scala    From Akka-Cookbook   with MIT License 5 votes vote down vote up
package com.packt.chapter8

import akka.actor.ActorSystem
import akka.kafka.scaladsl.{Consumer, Producer}
import akka.kafka.{ConsumerSettings, ProducerSettings, Subscriptions}
import akka.stream.{ActorMaterializer, ClosedShape}
import akka.stream.scaladsl.{Flow, GraphDSL, RunnableGraph, Sink, Source}
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer, StringDeserializer, StringSerializer}

import scala.concurrent.duration._

object ProcessingKafkaApplication extends App {
  implicit val actorSystem = ActorSystem("SimpleStream")
  implicit val actorMaterializer = ActorMaterializer()

  val bootstrapServers = "localhost:9092"
  val kafkaTopic = "akka_streams_topic"
  val partition = 0
  val subscription = Subscriptions.assignment(new TopicPartition(kafkaTopic, partition))

  val consumerSettings = ConsumerSettings(actorSystem, new ByteArrayDeserializer, new StringDeserializer)
    .withBootstrapServers(bootstrapServers)
    .withGroupId("akka_streams_group")
    .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

  val producerSettings = ProducerSettings(actorSystem, new ByteArraySerializer, new StringSerializer)
    .withBootstrapServers(bootstrapServers)

  val runnableGraph = RunnableGraph.fromGraph(GraphDSL.create() { implicit builder =>
    import GraphDSL.Implicits._

    val tickSource = Source.tick(0 seconds, 5 seconds, "Hello from Akka Streams using Kafka!")
    val kafkaSource = Consumer.plainSource(consumerSettings, subscription)
    val kafkaSink = Producer.plainSink(producerSettings)
    val printlnSink = Sink.foreach(println)

    val mapToProducerRecord = Flow[String].map(elem => new ProducerRecord[Array[Byte], String](kafkaTopic, elem))
    val mapFromConsumerRecord = Flow[ConsumerRecord[Array[Byte], String]].map(record => record.value())

    tickSource  ~> mapToProducerRecord   ~> kafkaSink
    kafkaSource ~> mapFromConsumerRecord ~> printlnSink

    ClosedShape
  })

  runnableGraph.run()
} 
Example 21
Source File: EmbeddedKafkaCustomConfigSpec.scala    From embedded-kafka   with MIT License 5 votes vote down vote up
package net.manub.embeddedkafka

import kafka.server.KafkaConfig
import net.manub.embeddedkafka.EmbeddedKafka._
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.ProducerConfig

import scala.language.postfixOps
import scala.util.Random

class EmbeddedKafkaCustomConfigSpec extends EmbeddedKafkaSpecSupport {
  final val TwoMegabytes   = 2097152
  final val ThreeMegabytes = 3145728

  "the custom config" should {
    "allow pass additional producer parameters" in {
      val customBrokerConfig =
        Map(
          KafkaConfig.ReplicaFetchMaxBytesProp -> s"$ThreeMegabytes",
          KafkaConfig.MessageMaxBytesProp      -> s"$ThreeMegabytes"
        )

      val customProducerConfig =
        Map(ProducerConfig.MAX_REQUEST_SIZE_CONFIG -> s"$ThreeMegabytes")
      val customConsumerConfig =
        Map(
          ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG -> s"$ThreeMegabytes"
        )

      implicit val customKafkaConfig: EmbeddedKafkaConfig =
        EmbeddedKafkaConfig(
          customBrokerProperties = customBrokerConfig,
          customProducerProperties = customProducerConfig,
          customConsumerProperties = customConsumerConfig
        )

      val bigMessage = generateMessageOfLength(TwoMegabytes)
      val topic      = "big-message-topic"

      withRunningKafka {
        publishStringMessageToKafka(topic, bigMessage)
        consumeFirstStringMessageFrom(topic) shouldBe bigMessage
      }
    }
  }

  def generateMessageOfLength(length: Int): String =
    Iterator.continually(Random.nextPrintableChar) take length mkString
} 
Example 22
Source File: ConsumerSettings.scala    From zio-kafka   with Apache License 2.0 5 votes vote down vote up
package zio.kafka.consumer

import org.apache.kafka.clients.consumer.ConsumerConfig
import zio.duration._
import zio.kafka.consumer.Consumer.OffsetRetrieval

case class ConsumerSettings(
  bootstrapServers: List[String],
  properties: Map[String, AnyRef],
  closeTimeout: Duration,
  pollInterval: Duration,
  pollTimeout: Duration,
  perPartitionChunkPrefetch: Int,
  offsetRetrieval: OffsetRetrieval = OffsetRetrieval.Auto()
) {
  private[this] def autoOffsetResetConfig = offsetRetrieval match {
    case OffsetRetrieval.Auto(reset) => Map(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> reset.toConfig)
    case OffsetRetrieval.Manual(_)   => Map.empty
  }

  def driverSettings: Map[String, AnyRef] =
    Map(
      ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG  -> bootstrapServers.mkString(","),
      ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> "false"
    ) ++ autoOffsetResetConfig ++ properties

  def withBootstrapServers(servers: List[String]): ConsumerSettings =
    copy(bootstrapServers = servers)

  def withCloseTimeout(timeout: Duration): ConsumerSettings =
    copy(closeTimeout = timeout)

  def withClientId(clientId: String): ConsumerSettings =
    withProperty(ConsumerConfig.CLIENT_ID_CONFIG, clientId)

  def withGroupId(groupId: String): ConsumerSettings =
    withProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId)

  def withOffsetRetrieval(retrieval: OffsetRetrieval): ConsumerSettings =
    copy(offsetRetrieval = retrieval)

  def withPerPartitionChunkPrefetch(prefetch: Int): ConsumerSettings =
    copy(perPartitionChunkPrefetch = prefetch)

  def withPollInterval(interval: Duration): ConsumerSettings =
    copy(pollInterval = interval)

  def withPollTimeout(timeout: Duration): ConsumerSettings =
    copy(pollTimeout = timeout)

  def withProperty(key: String, value: AnyRef): ConsumerSettings =
    copy(properties = properties + (key -> value))

  def withProperties(kvs: (String, AnyRef)*): ConsumerSettings =
    withProperties(kvs.toMap)

  def withProperties(kvs: Map[String, AnyRef]): ConsumerSettings =
    copy(properties = properties ++ kvs)
}

object ConsumerSettings {
  def apply(bootstrapServers: List[String]): ConsumerSettings =
    new ConsumerSettings(
      bootstrapServers = bootstrapServers,
      properties = Map(),
      closeTimeout = 30.seconds,
      pollInterval = 50.millis,
      pollTimeout = 50.millis,
      perPartitionChunkPrefetch = 2,
      offsetRetrieval = OffsetRetrieval.Auto()
    )
} 
Example 23
Source File: KafkaEventProducer.scala    From openwhisk   with Apache License 2.0 5 votes vote down vote up
package org.apache.openwhisk.core.database.cosmosdb.cache

import akka.Done
import akka.actor.ActorSystem
import akka.kafka.scaladsl.Producer
import akka.kafka.{ProducerMessage, ProducerSettings}
import akka.stream.scaladsl.{Keep, Sink, Source}
import akka.stream.{ActorMaterializer, OverflowStrategy, QueueOfferResult}
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.openwhisk.connector.kafka.KamonMetricsReporter

import scala.collection.immutable.Seq
import scala.concurrent.{ExecutionContext, Future, Promise}

case class KafkaEventProducer(
  settings: ProducerSettings[String, String],
  topic: String,
  eventProducerConfig: EventProducerConfig)(implicit system: ActorSystem, materializer: ActorMaterializer)
    extends EventProducer {
  private implicit val executionContext: ExecutionContext = system.dispatcher

  private val queue = Source
    .queue[(Seq[String], Promise[Done])](eventProducerConfig.bufferSize, OverflowStrategy.dropNew) //TODO Use backpressure
    .map {
      case (msgs, p) =>
        ProducerMessage.multi(msgs.map(newRecord), p)
    }
    .via(Producer.flexiFlow(producerSettings))
    .map {
      case ProducerMessage.MultiResult(_, passThrough) =>
        passThrough.success(Done)
      case _ => //As we use multi mode only other modes need not be handled
    }
    .toMat(Sink.ignore)(Keep.left)
    .run

  override def send(msg: Seq[String]): Future[Done] = {
    val promise = Promise[Done]
    queue.offer(msg -> promise).flatMap {
      case QueueOfferResult.Enqueued    => promise.future
      case QueueOfferResult.Dropped     => Future.failed(new Exception("Kafka request queue is full."))
      case QueueOfferResult.QueueClosed => Future.failed(new Exception("Kafka request queue was closed."))
      case QueueOfferResult.Failure(f)  => Future.failed(f)
    }
  }

  def close(): Future[Done] = {
    queue.complete()
    queue.watchCompletion()
  }

  private def newRecord(msg: String) = new ProducerRecord[String, String](topic, "messages", msg)

  private def producerSettings =
    settings.withProperty(ConsumerConfig.METRIC_REPORTER_CLASSES_CONFIG, KamonMetricsReporter.name)
} 
Example 24
Source File: OrderConsumer.scala    From kafka-k8s-monitoring   with MIT License 5 votes vote down vote up
package com.xebia.orders

import akka.Done
import akka.actor.ActorSystem
import akka.kafka.scaladsl._
import akka.kafka._
import akka.stream.{ActorMaterializer, Materializer}
import akka.stream.scaladsl.Sink
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization._

object OrderConsumer extends App {

  private implicit val actorSystem = ActorSystem("orders")
  implicit val mat: Materializer = ActorMaterializer()
  implicit val ec = actorSystem.dispatcher

  val settings = ConsumerSettings(actorSystem, new StringDeserializer, new ByteArrayDeserializer)
    .withBootstrapServers("kafka:9092")
    .withGroupId("my-group")
    .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

  Consumer.plainSource(settings, Subscriptions.topics("orders"))
    .map(_.value())
    .map(new String(_))
    .map(println)
    .runWith(Sink.ignore).onComplete {
      _ =>
        println("Stream is dead!")
        sys.exit(1)
    }


} 
Example 25
Source File: ConsumerStream.scala    From reactive-kafka-microservice-template   with Apache License 2.0 5 votes vote down vote up
package com.omearac.consumers

import akka.actor.{ActorRef, ActorSystem}
import akka.kafka.ConsumerMessage.CommittableOffsetBatch
import akka.kafka.scaladsl.Consumer
import akka.kafka.{ConsumerMessage, ConsumerSettings, Subscriptions}
import akka.stream.scaladsl.{Flow, Sink}
import com.omearac.shared.EventMessages.FailedMessageConversion
import com.omearac.shared.JsonMessageConversion.Conversion
import com.omearac.shared.{AkkaStreams, EventSourcing}
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, StringDeserializer}

import scala.collection.mutable.ArrayBuffer
import scala.concurrent.Future



trait ConsumerStream extends AkkaStreams with EventSourcing {
    implicit val system: ActorSystem
    def self: ActorRef


    def createStreamSink(consumerActorSink : ActorRef) = {
        Sink.actorRefWithAck(consumerActorSink, "STREAM_INIT", "OK", "STREAM_DONE")
    }

    def createStreamSource(consumerProperties: Map[String,String])  = {
        val kafkaMBAddress = consumerProperties("bootstrap-servers")
        val groupID = consumerProperties("groupId")
        val topicSubscription = consumerProperties("subscription-topic")
        val consumerSettings = ConsumerSettings(system, new ByteArrayDeserializer, new StringDeserializer)
            .withBootstrapServers(kafkaMBAddress)
            .withGroupId(groupID)
            .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

        Consumer.committableSource(consumerSettings, Subscriptions.topics(topicSubscription))
    }

    def createStreamFlow[msgType: Conversion] = {
        Flow[ConsumerMessage.CommittableMessage[Array[Byte], String]]
            .map(msg => (msg.committableOffset, Conversion[msgType].convertFromJson(msg.record.value)))
            //Publish the conversion error event messages returned from the JSONConversion
            .map (tuple => publishConversionErrors[msgType](tuple))
            .filter(result => result.isRight)
            .map(test => test.right.get)
            //Group the commit offsets and correctly converted messages for more efficient Kafka commits
            .batch(max = 20, tuple => (CommittableOffsetBatch.empty.updated(tuple._1), ArrayBuffer[msgType](tuple._2)))
            {(tupleOfCommitOffsetAndMsgs, tuple) =>
            (tupleOfCommitOffsetAndMsgs._1.updated(tuple._1), tupleOfCommitOffsetAndMsgs._2 :+ tuple._2)
            }
            //Take the first element of the tuple (set of commit numbers) to add to kafka commit log and then return the collection of grouped case class messages
            .mapAsync(4)(tupleOfCommitOffsetAndMsgs => commitOffsetsToKafka[msgType](tupleOfCommitOffsetAndMsgs))
            .map(msgGroup => msgGroup._2)
    }

    def commitOffsetsToKafka[msgType](tupleOfCommitOffsetAndMsgs: (ConsumerMessage.CommittableOffsetBatch, ArrayBuffer[msgType])) = Future {
        (tupleOfCommitOffsetAndMsgs._1.commitScaladsl(), tupleOfCommitOffsetAndMsgs._2)
    }

    def publishConversionErrors[msgType](tupleOfCommitOffsetAndConversionResults: (ConsumerMessage.CommittableOffset, Either[FailedMessageConversion,msgType]))
    : Either[Unit,(ConsumerMessage.CommittableOffset,msgType)] = {

        if (tupleOfCommitOffsetAndConversionResults._2.isLeft) {

            //Publish a local event that there was a failure in conversion
            publishLocalEvent(tupleOfCommitOffsetAndConversionResults._2.left.get)

            //Commit the Kafka Offset to acknowledge that the message was consumed
            Left(tupleOfCommitOffsetAndConversionResults._1.commitScaladsl())
        }
        else
            Right(tupleOfCommitOffsetAndConversionResults._1,tupleOfCommitOffsetAndConversionResults._2.right.get)
    }
} 
Example 26
Source File: WebSocket.scala    From trucking-iot   with Apache License 2.0 5 votes vote down vote up
package controllers

import javax.inject.{Inject, Singleton}

import akka.actor.{Actor, ActorRef, ActorSystem, Props}
import akka.kafka.scaladsl.Consumer
import akka.kafka.{ConsumerSettings, Subscriptions}
import akka.stream.scaladsl.Sink
import akka.stream.{Materializer, ThrottleMode}
import com.typesafe.config.ConfigFactory
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.{ByteArrayDeserializer, StringDeserializer}
import play.api.libs.streams.ActorFlow
import play.api.mvc.{Controller, WebSocket}

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future
import scala.concurrent.duration._


//@Singleton
class KafkaWebSocket @Inject() (implicit system: ActorSystem, materializer: Materializer) extends Controller {

  def kafkaWS = WebSocket.accept[String, String] { request =>
    ActorFlow.actorRef(out => KafkaWSActor.props(out))
  }

  object KafkaWSActor {
    def props(outRef: ActorRef) = Props(new KafkaWSActor(outRef))
  }

  class KafkaWSActor(outRef: ActorRef) extends Actor {

    val config = ConfigFactory.load()
    val combinedConfig = ConfigFactory.defaultOverrides()
      .withFallback(config)
      .withFallback(ConfigFactory.defaultApplication())
      .getConfig("trucking-web-application.backend")

    val consumerSettings = ConsumerSettings(system, new ByteArrayDeserializer, new StringDeserializer)
      //.withBootstrapServers("sandbox-hdf.hortonworks.com:6667")
      .withBootstrapServers(combinedConfig.getString("kafka.bootstrap-servers"))
      .withGroupId("group1")
      .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")

    Consumer.committableSource(consumerSettings, Subscriptions.topics("trucking_data_joined"))
      .mapAsync(1) { msg => Future(outRef ! msg.record.value).map(_ => msg) }
      //.mapAsync(1) { msg => msg.committableOffset.commitScaladsl() } // TODO: Disabling commits for debug
      .throttle(1, 250.milliseconds, 1, ThrottleMode.Shaping)
      .runWith(Sink.ignore)

    def receive = {
      case msg: String => outRef ! s"Ack: $msg"
    }
  }

} 
Example 27
Source File: WordCountConsumer.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package alpakka.kafka

import akka.Done
import akka.actor.{ActorSystem, Props}
import akka.kafka.scaladsl.Consumer.DrainingControl
import akka.kafka.scaladsl.{Committer, Consumer}
import akka.kafka.{CommitterSettings, ConsumerSettings, Subscriptions}
import akka.stream.scaladsl.Sink
import akka.util.Timeout
import alpakka.kafka.TotalFake.{IncrementMessage, IncrementWord}
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.{LongDeserializer, StringDeserializer}

import scala.concurrent.Future
import scala.concurrent.duration._


object WordCountConsumer extends App {
  implicit val system = ActorSystem("WordCountConsumer")
  implicit val ec = system.dispatcher

  val total = system.actorOf(Props[TotalFake], "totalFake")

  val committerSettings = CommitterSettings(system).withMaxBatch(1)

  def createConsumerSettings(group: String): ConsumerSettings[String, java.lang.Long] = {
    ConsumerSettings(system, new StringDeserializer , new LongDeserializer)
      .withBootstrapServers("localhost:9092")
      .withGroupId(group)
      //Define consumer behavior upon starting to read a partition for which it does not have a committed offset or if the committed offset it has is invalid
      .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
  }

  def createAndRunConsumerWordCount(id: String) = {
    Consumer.committableSource(createConsumerSettings("wordcount consumer group"), Subscriptions.topics("wordcount-output"))
      .mapAsync(1) { msg =>
        //println(s"$id - Offset: ${msg.record.offset()} - Partition: ${msg.record.partition()} Consume msg with key: ${msg.record.key()} and value: ${msg.record.value()}")
        if (msg.record.key().equalsIgnoreCase("fakeNews")) { //hardcoded because WordCountProducer.fakeNewsKeyword does not work
          import akka.pattern.ask
          implicit val askTimeout: Timeout = Timeout(3.seconds)
          (total ? IncrementWord(msg.record.value.toInt, id))
            .mapTo[Done]
            .map(_ => msg.committableOffset)
        } else {
          Future(msg).map(_ => msg.committableOffset)
        }
      }
      .via(Committer.flow(committerSettings))
      .toMat(Sink.seq)(DrainingControl.apply)
      .run()
  }

  def createAndRunConsumerMessageCount(id: String) = {
    Consumer.committableSource(createConsumerSettings("messagecount consumer group"), Subscriptions.topics("messagecount-output"))
      .mapAsync(1) { msg =>
        //println(s"$id - Offset: ${msg.record.offset()} - Partition: ${msg.record.partition()} Consume msg with key: ${msg.record.key()} and value: ${msg.record.value()}")
        import akka.pattern.ask
        implicit val askTimeout: Timeout = Timeout(3.seconds)
        (total ? IncrementMessage(msg.record.value.toInt, id))
          .mapTo[Done]
          .map(_ => msg.committableOffset)
      }
      .via(Committer.flow(committerSettings))
      .toMat(Sink.seq)(DrainingControl.apply)
      .run()
  }

  val drainingControlW1 = createAndRunConsumerWordCount("W.1")
  val drainingControlW2 = createAndRunConsumerWordCount("W.2")
  val drainingControlM = createAndRunConsumerMessageCount("M")


  sys.addShutdownHook{
    println("Got control-c cmd from shell, about to shutdown...")
    drainingControlW1.drainAndShutdown()
    drainingControlW2.drainAndShutdown()
    drainingControlM.drainAndShutdown()
  }
} 
Example 28
Source File: ConfManager.scala    From HadoopLearning   with MIT License 5 votes vote down vote up
package com.utils

import java.util.regex.Pattern

import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.StringDeserializer

import scala.collection.mutable.HashMap

/**
  * 描述 Spark Streaming 配置
  *
  * @author liumm
  * @since 2018-07-27 20:27
  */
object ConfManager {

  /**
    * 每次入库最大记录数量
    */
  val maxRecords = 1000

  /**
    * 配置Kafka
    *
    * @param streamConf
    * @return
    */
  def kafkaParam(streamConf: StreamConf): (Map[String, Object], Pattern) = {
    (getConsumerConfig(streamConf.brokers, streamConf.groupId), Pattern.compile(streamConf.topics))
  }

  def kafkaParamForMetadata(streamConf: StreamConf): Map[String, String] = {
    val kafkaParams = new HashMap[String, String]()
    kafkaParams += (ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> streamConf.brokers)
    kafkaParams += ("metadata.broker.list" -> streamConf.brokers)
    kafkaParams += (ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "smallest")
    kafkaParams += (ConsumerConfig.GROUP_ID_CONFIG -> streamConf.groupId)
    kafkaParams.toMap
  }

  /**
    * 生成Kafka的Consumer配置信息
    *
    * @return Kafka的Consumer配置信息
    */
  private def getConsumerConfig(brokers: String, groupId: String): Map[String, Object] = {
    val kafkaParams = new HashMap[String, Object]()

    kafkaParams += (ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokers)
    kafkaParams += (ConsumerConfig.GROUP_ID_CONFIG -> groupId)
    kafkaParams += (ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer])
    kafkaParams += (ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer])

    kafkaParams += (ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG -> new Integer(3 * 1024 * 1024))
    kafkaParams += (ConsumerConfig.MAX_POLL_RECORDS_CONFIG -> new Integer(100))

    kafkaParams += (ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "latest")
    //关闭kafka自动提交offset方式
    kafkaParams += (ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (false: java.lang.Boolean))

    kafkaParams.toMap
  }

  def newStreamConf() = {
    val conf = new StreamConf()
    conf.zkUrl = "hdp01:2181"
    conf.brokers = "hdp01:9092"
    conf.groupId = "liumm_group"
    conf.topics = "i57_.*"
    conf
  }

} 
Example 29
Source File: TestStreamsConfig.scala    From scalatest-embedded-kafka   with MIT License 5 votes vote down vote up
package net.manub.embeddedkafka.streams

import java.nio.file.Files

import net.manub.embeddedkafka.EmbeddedKafkaConfig
import org.apache.kafka.clients.consumer.{ConsumerConfig, OffsetResetStrategy}
import org.apache.kafka.streams.StreamsConfig


  def streamConfig(streamName: String,
                   extraConfig: Map[String, AnyRef] = Map.empty)(
      implicit kafkaConfig: EmbeddedKafkaConfig): StreamsConfig = {
    import scala.collection.JavaConverters._

    val defaultConfig = Map(
      StreamsConfig.APPLICATION_ID_CONFIG -> streamName,
      StreamsConfig.BOOTSTRAP_SERVERS_CONFIG -> s"localhost:${kafkaConfig.kafkaPort}",
      StreamsConfig.STATE_DIR_CONFIG -> Files
        .createTempDirectory(streamName)
        .toString,
      // force stream consumers to start reading from the beginning so as not to lose messages
      ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> OffsetResetStrategy.EARLIEST.toString.toLowerCase
    )
    val configOverwrittenByExtra = defaultConfig ++
      extraConfig
    new StreamsConfig(configOverwrittenByExtra.asJava)
  }
} 
Example 30
Source File: CachedKafkaConsumer.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import java.{util => ju}

import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord, KafkaConsumer}
import org.apache.kafka.common.TopicPartition

import org.apache.spark.{SparkEnv, SparkException, TaskContext}
import org.apache.spark.internal.Logging



  def getOrCreate(
      topic: String,
      partition: Int,
      kafkaParams: ju.Map[String, Object]): CachedKafkaConsumer = synchronized {
    val groupId = kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG).asInstanceOf[String]
    val topicPartition = new TopicPartition(topic, partition)
    val key = CacheKey(groupId, topicPartition)

    // If this is reattempt at running the task, then invalidate cache and start with
    // a new consumer
    if (TaskContext.get != null && TaskContext.get.attemptNumber > 1) {
      cache.remove(key)
      new CachedKafkaConsumer(topicPartition, kafkaParams)
    } else {
      if (!cache.containsKey(key)) {
        cache.put(key, new CachedKafkaConsumer(topicPartition, kafkaParams))
      }
      cache.get(key)
    }
  }
} 
Example 31
Source File: EmbeddedKafkaCustomConfigSpec.scala    From scalatest-embedded-kafka   with MIT License 4 votes vote down vote up
package net.manub.embeddedkafka

import kafka.server.KafkaConfig
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.ProducerConfig

import scala.language.postfixOps
import scala.util.Random

class EmbeddedKafkaCustomConfigSpec
    extends EmbeddedKafkaSpecSupport
    with EmbeddedKafka {
  val TwoMegabytes = 2097152
  val ThreeMegabytes = 3145728

  "the custom config" should {
    "allow pass additional producer parameters" in {
      val customBrokerConfig =
        Map(KafkaConfig.ReplicaFetchMaxBytesProp -> s"$ThreeMegabytes",
            KafkaConfig.MessageMaxBytesProp -> s"$ThreeMegabytes")

      val customProducerConfig =
        Map(ProducerConfig.MAX_REQUEST_SIZE_CONFIG -> s"$ThreeMegabytes")
      val customConsumerConfig =
        Map(
          ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG -> s"$ThreeMegabytes")

      implicit val customKafkaConfig =
        EmbeddedKafkaConfig(customBrokerProperties = customBrokerConfig,
                            customProducerProperties = customProducerConfig,
                            customConsumerProperties = customConsumerConfig)

      val bigMessage = generateMessageOfLength(TwoMegabytes)
      val topic = "big-message-topic"

      withRunningKafka {
        publishStringMessageToKafka(topic, bigMessage)
        consumeFirstStringMessageFrom(topic) shouldBe bigMessage
      }
    }
  }

  def generateMessageOfLength(length: Int): String =
    Stream.continually(Random.nextPrintableChar) take length mkString
}