org.apache.kafka.connect.source.SourceRecord Scala Examples

The following examples show how to use org.apache.kafka.connect.source.SourceRecord. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: TwitterStatusReader.scala    From kafka-tweet-producer   with Apache License 2.0 5 votes vote down vote up
package com.eneco.trading.kafka.connect.twitter

import java.util
import java.util.concurrent.{TimeUnit, LinkedBlockingQueue, Executors}
import com.eneco.trading.kafka.connect.twitter.domain.TwitterStatus
import com.twitter.hbc.httpclient.BasicClient
import com.twitter.hbc.twitter4j.Twitter4jStatusClient
import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.source.SourceRecord
import twitter4j._
import scala.collection.JavaConverters._
import Extensions._

class StatusEnqueuer(queue: LinkedBlockingQueue[Status]) extends StatusListener with Logging {
  override def onStallWarning(stallWarning: StallWarning) = log.warn("onStallWarning")
  override def onDeletionNotice(statusDeletionNotice: StatusDeletionNotice) = log.info("onDeletionNotice")

  override def onScrubGeo(l: Long, l1: Long) = {
    log.debug(s"onScrubGeo $l $l1")
  }

  override def onStatus(status: Status) = {
    log.debug("onStatus")
    queue.put(status)
  }

  override def onTrackLimitationNotice(i: Int) = log.info(s"onTrackLimitationNotice $i")
  override def onException(e: Exception)= log.warn("onException " + e.toString)
}

trait StatusToSourceRecord {
  def convert(status: Status, topic: String): SourceRecord
}

object StatusToStringKeyValue extends StatusToSourceRecord {
  def convert (status: Status, topic: String): SourceRecord = {
    new SourceRecord(
      Map("tweetSource" -> status.getSource).asJava, //source partitions?
      Map("tweetId" -> status.getId).asJava, //source offsets?
      topic,
      null,
      Schema.STRING_SCHEMA,
      status.getUser.getScreenName,
      Schema.STRING_SCHEMA,
      status.getText)
  }
}

object StatusToTwitterStatusStructure extends StatusToSourceRecord {
  def convert(status: Status, topic: String): SourceRecord = {
    //val ts = TwitterStatus.struct(TwitterStatus(status))
    new SourceRecord(
      Map("tweetSource" -> status.getSource).asJava, //source partitions?
      Map("tweetId" -> status.getId).asJava, //source offsets?
      topic,
      TwitterStatus.schema,
      TwitterStatus.struct(status))
  }
}


  def stop() = {
    log.info("Stop Twitter client")
    client.stop()
  }


} 
Example 2
Source File: HiveSource.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.landoop.streamreactor.connect.hive.source

import com.landoop.streamreactor.connect.hive
import com.landoop.streamreactor.connect.hive._
import com.landoop.streamreactor.connect.hive.formats.{HiveFormat, HiveReader, Record}
import com.landoop.streamreactor.connect.hive.source.config.HiveSourceConfig
import com.landoop.streamreactor.connect.hive.source.mapper.{PartitionValueMapper, ProjectionMapper}
import com.landoop.streamreactor.connect.hive.source.offset.HiveSourceOffsetStorageReader
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.hive.metastore.IMetaStoreClient
import org.apache.kafka.connect.data.Struct
import org.apache.kafka.connect.source.SourceRecord

import scala.collection.JavaConverters._


class HiveSource(db: DatabaseName,
                 tableName: TableName,
                 topic: Topic,
                 offsetReader: HiveSourceOffsetStorageReader,
                 config: HiveSourceConfig)
                (implicit client: IMetaStoreClient, fs: FileSystem) extends Iterator[SourceRecord] {

  val tableConfig = config.tableOptions.filter(_.tableName == tableName).find(_.topic == topic)
    .getOrElse(sys.error(s"Cannot find table configuration for ${db.value}.${tableName.value} => ${topic.value}"))

  private val table = client.getTable(db.value, tableName.value)
  private val format = HiveFormat(hive.serde(table))
  private val metastoreSchema = HiveSchemas.toKafka(table)
  private val parts = TableFileScanner.scan(db, tableName)

  private val readers = parts.map { case (path, partition) =>

    val fns: Seq[Struct => Struct] = Seq(
      partition.map(new PartitionValueMapper(_).map _),
      tableConfig.projection.map(new ProjectionMapper(_).map _)
    ).flatten
    val mapper: Struct => Struct = Function.chain(fns)

    val sourceOffset = offsetReader.offset(SourcePartition(db, tableName, topic, path)).getOrElse(SourceOffset(0))

    new HiveReader {
      lazy val reader = format.reader(path, sourceOffset.rowNumber, metastoreSchema)
      override def iterator: Iterator[Record] = reader.iterator.map { record =>
        Record(mapper(record.struct), record.path, record.offset)
      }
      override def close(): Unit = reader.close()
    }
  }

  private val iterator: Iterator[Record] = readers.map(_.iterator).reduce(_ ++ _).take(tableConfig.limit)

  override def hasNext: Boolean = iterator.hasNext

  override def next(): SourceRecord = {

    val record = iterator.next
    val sourcePartition = SourcePartition(db, tableName, topic, record.path)
    val offset = SourceOffset(record.offset)

    new SourceRecord(
      fromSourcePartition(sourcePartition).asJava,
      fromSourceOffset(offset).asJava,
      topic.value,
      record.struct.schema,
      record.struct
    )
  }

  def close(): Unit = {
    readers.foreach(_.close())
  }
} 
Example 3
Source File: CoapReaderFactory.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.coap.source

import java.util
import java.util.concurrent.LinkedBlockingQueue

import com.datamountaineer.streamreactor.connect.coap.configs.CoapSetting
import com.datamountaineer.streamreactor.connect.coap.connection.CoapManager
import com.datamountaineer.streamreactor.connect.coap.domain.CoapMessageConverter
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.connect.source.SourceRecord
import org.eclipse.californium.core.{CoapHandler, CoapObserveRelation, CoapResponse, WebLink}


class MessageHandler(resource: String, topic: String, queue: LinkedBlockingQueue[SourceRecord]) extends CoapHandler with StrictLogging {
  val converter = CoapMessageConverter()

  override def onError(): Unit = {
    logger.warn(s"Message dropped for $topic!")
  }

  override def onLoad(response: CoapResponse): Unit = {
    val records = converter.convert(resource, topic, response.advanced())
    logger.debug(s"Received ${response.advanced().toString} for $topic")
    logger.debug(s"Records in queue ${queue.size()} for $topic")
    queue.put(records)
  }
} 
Example 4
Source File: CoapSourceTask.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.coap.source

import java.util
import java.util.concurrent.LinkedBlockingQueue

import com.datamountaineer.streamreactor.connect.coap.configs.{CoapConstants, CoapSettings, CoapSourceConfig}
import com.datamountaineer.streamreactor.connect.queues.QueueHelpers
import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter}
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.connect.source.{SourceRecord, SourceTask}

import scala.collection.JavaConverters._


class CoapSourceTask extends SourceTask with StrictLogging {
  private var readers: Set[CoapReader] = _
  private val progressCounter = new ProgressCounter
  private var enableProgress: Boolean = false
  private val queue = new LinkedBlockingQueue[SourceRecord]()
  private var batchSize: Int = CoapConstants.BATCH_SIZE_DEFAULT
  private var lingerTimeout = CoapConstants.SOURCE_LINGER_MS_DEFAULT
  private val manifest = JarManifest(getClass.getProtectionDomain.getCodeSource.getLocation)

  override def start(props: util.Map[String, String]): Unit = {
    logger.info(scala.io.Source.fromInputStream(getClass.getResourceAsStream("/coap-source-ascii.txt")).mkString + s" $version")
    logger.info(manifest.printManifest())

    val conf = if (context.configs().isEmpty) props else context.configs()

    val config = CoapSourceConfig(conf)
    enableProgress = config.getBoolean(CoapConstants.PROGRESS_COUNTER_ENABLED)
    val settings = CoapSettings(config)
    batchSize = config.getInt(CoapConstants.BATCH_SIZE)
    lingerTimeout = config.getInt(CoapConstants.SOURCE_LINGER_MS)
    enableProgress = config.getBoolean(CoapConstants.PROGRESS_COUNTER_ENABLED)
    readers = CoapReaderFactory(settings, queue)
  }

  override def poll(): util.List[SourceRecord] = {
    val records = new util.ArrayList[SourceRecord]()

    QueueHelpers.drainWithTimeoutNoGauva(records, batchSize, lingerTimeout * 1000000 , queue)

    if (enableProgress) {
      progressCounter.update(records.asScala.toVector)
    }
    records
  }

  override def stop(): Unit = {
    logger.info("Stopping Coap source and closing connections.")
    readers.foreach(_.stop())
    progressCounter.empty
  }

  override def version: String = manifest.version()
} 
Example 5
Source File: SimpleFileConverter.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.ftp.source

import java.util

import com.datamountaineer.streamreactor.connect.ftp.source.SourceRecordProducers.SourceRecordProducer
import org.apache.kafka.connect.source.SourceRecord
import org.apache.kafka.connect.storage.OffsetStorageReader

import scala.collection.JavaConverters._


class SimpleFileConverter(props: util.Map[String, String], offsetStorageReader : OffsetStorageReader)
  extends FileConverter(props, offsetStorageReader) {

  val cfg = new FtpSourceConfig(props)
  val metaStore = new ConnectFileMetaDataStore(offsetStorageReader)
  val recordConverter: SourceRecordConverter = cfg.sourceRecordConverter
  val recordMaker: SourceRecordProducer = cfg.keyStyle match {
    case KeyStyle.String => SourceRecordProducers.stringKeyRecord
    case KeyStyle.Struct => SourceRecordProducers.structKeyRecord
  }

  override def convert(topic: String, meta: FileMetaData, body: FileBody): Seq[SourceRecord] = {
    metaStore.set(meta.attribs.path, meta)
    recordConverter.convert(recordMaker(metaStore, topic, meta, body)).asScala
  }

  override def getFileOffset(path: String): Option[FileMetaData] = metaStore.get(path)
} 
Example 6
Source File: FileConverter.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.ftp.source

import java.util

import org.apache.kafka.connect.source.SourceRecord
import org.apache.kafka.connect.storage.OffsetStorageReader

import scala.util.{Failure, Success, Try}


abstract class FileConverter(props: util.Map[String, String], offsetStorageReader : OffsetStorageReader) {
  def convert(topic: String, meta: FileMetaData, body: FileBody) : Seq[SourceRecord]
  def getFileOffset(path: String) : Option[FileMetaData]
}

object FileConverter {
  def apply(klass: Class[_], props: util.Map[String, String], offsetStorageReader: OffsetStorageReader) : FileConverter = {
    Try(klass.getDeclaredConstructor(classOf[util.Map[String, String]], classOf[OffsetStorageReader])
      .newInstance(props, offsetStorageReader).asInstanceOf[FileConverter]) match {
      case Success(fc) => fc
      case Failure(err) => throw new Exception(s"Failed to create ${klass} as instance of ${classOf[FileConverter]}", err)
    }
  }
} 
Example 7
Source File: MaxLinesFileConverter.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.ftp.source

import java.util

import com.datamountaineer.streamreactor.connect.ftp.source.SourceRecordProducers.SourceRecordProducer
import org.apache.kafka.connect.source.SourceRecord
import org.apache.kafka.connect.storage.OffsetStorageReader

import scala.collection.JavaConverters._


class MaxLinesFileConverter(props: util.Map[String, String], offsetStorageReader : OffsetStorageReader)
  extends FileConverter(props, offsetStorageReader) {

  val cfg = new FtpSourceConfig(props)
  val metaStore = new ConnectFileMetaDataStore(offsetStorageReader)
  val recordConverter: SourceRecordConverter = cfg.sourceRecordConverter
  val recordMaker: SourceRecordProducer = cfg.keyStyle match {
    case KeyStyle.String => SourceRecordProducers.stringKeyRecord
    case KeyStyle.Struct => SourceRecordProducers.structKeyRecord
  }
  val lineSep = System.getProperty("line.separator").getBytes

  override def convert(topic: String, meta: FileMetaData, body: FileBody): Seq[SourceRecord] = {
    if (meta.attribs.size == meta.offset) {
      //Last slice of the file. there is maybe no line separator at the end of the file
      metaStore.set(meta.attribs.path, meta)
      recordConverter.convert(recordMaker(metaStore, topic, meta, body)).asScala
    } else {
      val offsetInSlice = findEndPositionOfLastMatch(lineSep, body.bytes)
      // TODO : warn that no line seprator was found, suggest that the line sizes maybe exceeds the slice size
      val offset = meta.offset - (body.bytes.size-offsetInSlice)
      metaStore.set(meta.attribs.path, meta.offset(offset))
      val trimmedBody = FileBody(util.Arrays.copyOfRange(body.bytes, 0, offsetInSlice), 0)
      recordConverter.convert(recordMaker(metaStore, topic, meta, trimmedBody)).asScala
    }
  }

  def findEndPositionOfLastMatch(bytesToMatch: Array[Byte], content: Array[Byte]) : Int = {
    for (pos <- content.size to bytesToMatch.size by -1){
      val window = util.Arrays.copyOfRange(content, pos - bytesToMatch.size, pos)
      if (window.deep == bytesToMatch.deep) return pos
    }
    -1
  }

  override def getFileOffset(path: String): Option[FileMetaData] = {
    metaStore.get(path)
  }
} 
Example 8
Source File: SourceRecordProducers.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.ftp.source

import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct}
import org.apache.kafka.connect.source.SourceRecord


object SourceRecordProducers {
  type SourceRecordProducer = (ConnectFileMetaDataStore, String, FileMetaData, FileBody) => SourceRecord

  val fileInfoSchema = SchemaBuilder.struct()
    .field("name", Schema.STRING_SCHEMA)
    .field("offset", Schema.INT64_SCHEMA)
    .build()

  def stringKeyRecord(store: ConnectFileMetaDataStore, topic: String, meta: FileMetaData, body: FileBody): SourceRecord =
    new SourceRecord(
      store.fileMetasToConnectPartition(meta), // source part
      store.fileMetasToConnectOffset(meta), // source off
      topic, //topic
      Schema.STRING_SCHEMA, // key sch
      meta.attribs.path, // key
      Schema.BYTES_SCHEMA, // val sch
      body.bytes // val
    )

  def structKeyRecord(store: ConnectFileMetaDataStore, topic: String, meta: FileMetaData, body: FileBody): SourceRecord = {
    new SourceRecord(
      store.fileMetasToConnectPartition(meta), // source part
      store.fileMetasToConnectOffset(meta), // source off
      topic, //topic
      fileInfoSchema, // key sch
      new Struct(fileInfoSchema)
        .put("name",meta.attribs.path)
        .put("offset",body.offset),
      Schema.BYTES_SCHEMA, // val sch
      body.bytes // val
    )
  }
} 
Example 9
Source File: TwitterReader.scala    From kafka-tweet-producer   with Apache License 2.0 5 votes vote down vote up
package com.eneco.trading.kafka.connect.twitter

import java.util.concurrent.LinkedBlockingQueue

import com.twitter.hbc.ClientBuilder
import com.twitter.hbc.core.Constants
import com.twitter.hbc.core.endpoint.StatusesFilterEndpoint
import com.twitter.hbc.core.endpoint.StatusesSampleEndpoint
import com.twitter.hbc.core.endpoint.DefaultStreamingEndpoint
import com.twitter.hbc.core.processor.StringDelimitedProcessor
import com.twitter.hbc.core.endpoint.Location
import com.twitter.hbc.httpclient.auth.OAuth1
import org.apache.kafka.connect.source.{SourceRecord, SourceTaskContext}
import twitter4j.Status
import scala.collection.JavaConversions._
import scala.collection.JavaConverters._


object TwitterReader {
  def apply(config: TwitterSourceConfig, context: SourceTaskContext) = {
    //endpoints
    val endpoint: DefaultStreamingEndpoint = if (config.getString(TwitterSourceConfig.STREAM_TYPE).equals(TwitterSourceConfig.STREAM_TYPE_SAMPLE)) {
      new StatusesSampleEndpoint()
    } else {
      val trackEndpoint = new StatusesFilterEndpoint()
      val terms = config.getList(TwitterSourceConfig.TRACK_TERMS) 
      if (!terms.isEmpty) {
        trackEndpoint.trackTerms(terms)
      }
      val locs = config.getList(TwitterSourceConfig.TRACK_LOCATIONS)
      if (!locs.isEmpty) {
        val locations = locs.toList.map({ x => Double.box(x.toDouble)}).grouped(4).toList
            .map({ l => new Location(new Location.Coordinate(l(0), l(1)), new Location.Coordinate(l(2), l(3)))})
            .asJava
        trackEndpoint.locations(locations)
      }
      val follow = config.getList(TwitterSourceConfig.TRACK_FOLLOW) 
      if (!follow.isEmpty) {
        val users = follow.toList.map({ x => Long.box(x.trim.toLong)}).asJava
        trackEndpoint.followings(users)
      }
      trackEndpoint
    }
    endpoint.stallWarnings(false)
    val language = config.getList(TwitterSourceConfig.LANGUAGE) 
    if (!language.isEmpty) {
      // endpoint.languages(language) doesn't work as intended!
      endpoint.addQueryParameter(TwitterSourceConfig.LANGUAGE, language.toList.mkString(","))
    }

    //twitter auth stuff
    val auth = new OAuth1(config.getString(TwitterSourceConfig.CONSUMER_KEY_CONFIG),
      config.getPassword(TwitterSourceConfig.CONSUMER_SECRET_CONFIG).value,
      config.getString(TwitterSourceConfig.TOKEN_CONFIG),
      config.getPassword(TwitterSourceConfig.SECRET_CONFIG).value)

    //batch size to take from the queue
    val batchSize = config.getInt(TwitterSourceConfig.BATCH_SIZE)
    val batchTimeout = config.getDouble(TwitterSourceConfig.BATCH_TIMEOUT)

    //The Kafka topic to append to
    val topic = config.getString(TwitterSourceConfig.TOPIC)

    //queue for client to buffer to
    val queue = new LinkedBlockingQueue[String](10000)

    //how the output is formatted
    val statusConverter = config.getString(TwitterSourceConfig.OUTPUT_FORMAT) match {
      case TwitterSourceConfig.OUTPUT_FORMAT_ENUM_STRING => StatusToStringKeyValue
      case TwitterSourceConfig.OUTPUT_FORMAT_ENUM_STRUCTURED => StatusToTwitterStatusStructure
    }

    //build basic client
    val client = new ClientBuilder()
      .name(config.getString(TwitterSourceConfig.TWITTER_APP_NAME))
      .hosts(Constants.STREAM_HOST)
      .endpoint(endpoint)
      .authentication(auth)
      .processor(new StringDelimitedProcessor(queue))
      .build()

    new TwitterStatusReader(client = client, rawQueue = queue, batchSize = batchSize, 
        batchTimeout = batchTimeout, topic = topic, statusConverter = statusConverter)
  }
} 
Example 10
Source File: TwitterSourceTask.scala    From kafka-tweet-producer   with Apache License 2.0 5 votes vote down vote up
package com.eneco.trading.kafka.connect.twitter

import java.util
import org.apache.kafka.connect.source.{SourceRecord, SourceTask}


class TwitterSourceTask extends SourceTask with Logging {
  private var reader : Option[TwitterStatusReader] = null

  override def poll(): util.List[SourceRecord] = {
    require(reader.isDefined, "Twitter client not initialized!")
    reader.get.poll()
  }

  override def start(props: util.Map[String, String]): Unit = {
    val sourceConfig = new TwitterSourceConfig(props)
    reader = Some(TwitterReader(config = sourceConfig, context = context))
  }

  override def stop() = {
    reader.foreach(r=>r.stop())
  }
  override def version(): String = ""
} 
Example 11
Source File: MqttSourceTask.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.mqtt.source

import java.io.File
import java.util

import com.datamountaineer.streamreactor.connect.converters.source.Converter
import com.datamountaineer.streamreactor.connect.mqtt.config.{MqttConfigConstants, MqttSourceConfig, MqttSourceSettings}
import com.datamountaineer.streamreactor.connect.mqtt.connection.MqttClientConnectionFn
import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter}
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.common.config.ConfigException
import org.apache.kafka.connect.source.{SourceRecord, SourceTask}

import scala.collection.JavaConverters._
import scala.util.{Failure, Success, Try}

class MqttSourceTask extends SourceTask with StrictLogging {
  private val progressCounter = new ProgressCounter
  private var enableProgress: Boolean = false
  private var mqttManager: Option[MqttManager] = None
  private val manifest = JarManifest(getClass.getProtectionDomain.getCodeSource.getLocation)

  override def start(props: util.Map[String, String]): Unit = {

    logger.info(scala.io.Source.fromInputStream(this.getClass.getResourceAsStream("/mqtt-source-ascii.txt")).mkString + s" $version")
    logger.info(manifest.printManifest())

    val conf = if (context.configs().isEmpty) props else context.configs()

    val settings = MqttSourceSettings(MqttSourceConfig(conf))

    settings.sslCACertFile.foreach { file =>
      if (!new File(file).exists()) {
        throw new ConfigException(s"${MqttConfigConstants.SSL_CA_CERT_CONFIG} is invalid. Can't locate $file")
      }
    }

    settings.sslCertFile.foreach { file =>
      if (!new File(file).exists()) {
        throw new ConfigException(s"${MqttConfigConstants.SSL_CERT_CONFIG} is invalid. Can't locate $file")
      }
    }

    settings.sslCertKeyFile.foreach { file =>
      if (!new File(file).exists()) {
        throw new ConfigException(s"${MqttConfigConstants.SSL_CERT_KEY_CONFIG} is invalid. Can't locate $file")
      }
    }

    val convertersMap = settings.sourcesToConverters.map { case (topic, clazz) =>
      logger.info(s"Creating converter instance for $clazz")
      val converter = Try(Class.forName(clazz).newInstance()) match {
        case Success(value) => value.asInstanceOf[Converter]
        case Failure(_) => throw new ConfigException(s"Invalid ${MqttConfigConstants.KCQL_CONFIG} is invalid. $clazz should have an empty ctor!")
      }
      import scala.collection.JavaConverters._
      converter.initialize(conf.asScala.toMap)
      topic -> converter
    }

    logger.info("Starting Mqtt source...")
    mqttManager = Some(new MqttManager(MqttClientConnectionFn.apply, convertersMap, settings))
    enableProgress = settings.enableProgress
  }

  
  override def stop(): Unit = {
    logger.info("Stopping Mqtt source.")
    mqttManager.foreach(_.close())
    progressCounter.empty
  }

  override def version: String = manifest.version()
} 
Example 12
Source File: TwitterReader.scala    From kafka-connect-twitter   with Apache License 2.0 5 votes vote down vote up
package com.eneco.trading.kafka.connect.twitter

import java.util.concurrent.LinkedBlockingQueue

import com.twitter.hbc.ClientBuilder
import com.twitter.hbc.core.Constants
import com.twitter.hbc.core.endpoint.StatusesFilterEndpoint
import com.twitter.hbc.core.endpoint.StatusesSampleEndpoint
import com.twitter.hbc.core.endpoint.DefaultStreamingEndpoint
import com.twitter.hbc.core.processor.StringDelimitedProcessor
import com.twitter.hbc.core.endpoint.Location
import com.twitter.hbc.httpclient.auth.OAuth1
import org.apache.kafka.connect.source.{SourceRecord, SourceTaskContext}
import twitter4j.Status
import scala.collection.JavaConversions._
import scala.collection.JavaConverters._


object TwitterReader {
  def apply(config: TwitterSourceConfig, context: SourceTaskContext) = {
    //endpoints
    val endpoint: DefaultStreamingEndpoint = if (config.getString(TwitterSourceConfig.STREAM_TYPE).equals(TwitterSourceConfig.STREAM_TYPE_SAMPLE)) {
      new StatusesSampleEndpoint()
    } else {
      val trackEndpoint = new StatusesFilterEndpoint()
      val terms = config.getList(TwitterSourceConfig.TRACK_TERMS) 
      if (!terms.isEmpty) {
        trackEndpoint.trackTerms(terms)
      }
      val locs = config.getList(TwitterSourceConfig.TRACK_LOCATIONS)
      if (!locs.isEmpty) {
        val locations = locs.toList.map({ x => Double.box(x.toDouble)}).grouped(4).toList
            .map({ l => new Location(new Location.Coordinate(l(0), l(1)), new Location.Coordinate(l(2), l(3)))})
            .asJava
        trackEndpoint.locations(locations)
      }
      val follow = config.getList(TwitterSourceConfig.TRACK_FOLLOW) 
      if (!follow.isEmpty) {
        val users = follow.toList.map({ x => Long.box(x.trim.toLong)}).asJava
        trackEndpoint.followings(users)
      }
      trackEndpoint
    }
    endpoint.stallWarnings(false)
    val language = config.getList(TwitterSourceConfig.LANGUAGE) 
    if (!language.isEmpty) {
      // endpoint.languages(language) doesn't work as intended!
      endpoint.addQueryParameter(TwitterSourceConfig.LANGUAGE, language.toList.mkString(","))
    }

    //twitter auth stuff
    val auth = new OAuth1(config.getString(TwitterSourceConfig.CONSUMER_KEY_CONFIG),
      config.getPassword(TwitterSourceConfig.CONSUMER_SECRET_CONFIG).value,
      config.getString(TwitterSourceConfig.TOKEN_CONFIG),
      config.getPassword(TwitterSourceConfig.SECRET_CONFIG).value)

    //batch size to take from the queue
    val batchSize = config.getInt(TwitterSourceConfig.BATCH_SIZE)
    val batchTimeout = config.getDouble(TwitterSourceConfig.BATCH_TIMEOUT)

    //The Kafka topic to append to
    val topic = config.getString(TwitterSourceConfig.TOPIC)

    //queue for client to buffer to
    val queue = new LinkedBlockingQueue[String](10000)

    //how the output is formatted
    val statusConverter = config.getString(TwitterSourceConfig.OUTPUT_FORMAT) match {
      case TwitterSourceConfig.OUTPUT_FORMAT_ENUM_STRING => StatusToStringKeyValue
      case TwitterSourceConfig.OUTPUT_FORMAT_ENUM_STRUCTURED => StatusToTwitterStatusStructure
    }

    //build basic client
    val client = new ClientBuilder()
      .name(config.getString(TwitterSourceConfig.TWITTER_APP_NAME))
      .hosts(Constants.STREAM_HOST)
      .endpoint(endpoint)
      .authentication(auth)
      .processor(new StringDelimitedProcessor(queue))
      .build()

    new TwitterStatusReader(client = client, rawQueue = queue, batchSize = batchSize, 
        batchTimeout = batchTimeout, topic = topic, statusConverter = statusConverter)
  }
} 
Example 13
Source File: TwitterSourceTask.scala    From kafka-connect-twitter   with Apache License 2.0 5 votes vote down vote up
package com.eneco.trading.kafka.connect.twitter

import java.util
import org.apache.kafka.connect.source.{SourceRecord, SourceTask}


class TwitterSourceTask extends SourceTask with Logging {
  private var reader : Option[TwitterStatusReader] = null

  override def poll(): util.List[SourceRecord] = {
    require(reader.isDefined, "Twitter client not initialized!")
    reader.get.poll()
  }

  override def start(props: util.Map[String, String]): Unit = {
    val sourceConfig = new TwitterSourceConfig(props)
    reader = Some(TwitterReader(config = sourceConfig, context = context))
  }

  override def stop() = {
    reader.foreach(r=>r.stop())
  }
  override def version(): String = ""
} 
Example 14
Source File: TwitterStatusReader.scala    From kafka-connect-twitter   with Apache License 2.0 5 votes vote down vote up
package com.eneco.trading.kafka.connect.twitter

import java.util
import java.util.concurrent.{TimeUnit, LinkedBlockingQueue, Executors}
import com.eneco.trading.kafka.connect.twitter.domain.TwitterStatus
import com.twitter.hbc.httpclient.BasicClient
import com.twitter.hbc.twitter4j.Twitter4jStatusClient
import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.source.SourceRecord
import twitter4j._
import scala.collection.JavaConverters._
import Extensions._

class StatusEnqueuer(queue: LinkedBlockingQueue[Status]) extends StatusListener with Logging {
  override def onStallWarning(stallWarning: StallWarning) = log.warn("onStallWarning")
  override def onDeletionNotice(statusDeletionNotice: StatusDeletionNotice) = log.info("onDeletionNotice")

  override def onScrubGeo(l: Long, l1: Long) = {
    log.debug(s"onScrubGeo $l $l1")
  }

  override def onStatus(status: Status) = {
    log.debug("onStatus")
    queue.put(status)
  }

  override def onTrackLimitationNotice(i: Int) = log.info(s"onTrackLimitationNotice $i")
  override def onException(e: Exception)= log.warn("onException " + e.toString)
}

trait StatusToSourceRecord {
  def convert(status: Status, topic: String): SourceRecord
}

object StatusToStringKeyValue extends StatusToSourceRecord {
  def convert (status: Status, topic: String): SourceRecord = {
    new SourceRecord(
      Map("tweetSource" -> status.getSource).asJava, //source partitions?
      Map("tweetId" -> status.getId).asJava, //source offsets?
      topic,
      null,
      Schema.STRING_SCHEMA,
      status.getUser.getScreenName,
      Schema.STRING_SCHEMA,
      status.getText,
      status.getCreatedAt.getTime)
  }
}

object StatusToTwitterStatusStructure extends StatusToSourceRecord {
  def convert(status: Status, topic: String): SourceRecord = {
    //val ts = TwitterStatus.struct(TwitterStatus(status))
    new SourceRecord(
      Map("tweetSource" -> status.getSource).asJava, //source partitions?
      Map("tweetId" -> status.getId).asJava, //source offsets?
      topic,
      null,
      Schema.STRING_SCHEMA,
      status.getUser.getScreenName,
      TwitterStatus.schema,
      TwitterStatus.struct(status),
      status.getCreatedAt.getTime)
  }
}


  def stop() = {
    log.info("Stop Twitter client")
    client.stop()
  }


} 
Example 15
Source File: JsonPassThroughConverter.scala    From kafka-connect-common   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.converters.source

import java.util.Collections

import com.landoop.json.sql.JacksonJson
import org.apache.kafka.connect.source.SourceRecord


class JsonPassThroughConverter extends Converter {
  override def convert(kafkaTopic: String,
                       sourceTopic: String,
                       messageId: String,
                       bytes: Array[Byte],
                       keys: Seq[String] = Seq.empty,
                       keyDelimiter: String = "."): SourceRecord = {
    require(bytes != null, s"Invalid $bytes parameter")

    val json = new String(bytes, "utf-8")
    val jsonNode = JacksonJson.asJson(json)
    var keysValue = keys.flatMap { key =>
      Option(KeyExtractor.extract(jsonNode, key.split('.').toVector)).map(_.toString)
    }.mkString(keyDelimiter)

    // If keys are not provided, default one will be constructed
    if (keysValue == "") {
      keysValue = s"$sourceTopic$keyDelimiter$messageId"
    }

    new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic),
      null,
      kafkaTopic,
      null,
      keysValue,
      null,
      json)
  }
} 
Example 16
Source File: AvroConverter.scala    From kafka-connect-common   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.converters.source

import java.io.File
import java.util.Collections

import com.datamountaineer.streamreactor.connect.converters.MsgKey
import io.confluent.connect.avro.AvroData
import org.apache.avro.generic.{GenericDatumReader, GenericRecord}
import org.apache.avro.io.DecoderFactory
import org.apache.avro.{Schema => AvroSchema}
import org.apache.kafka.connect.data.{Schema, Struct}
import org.apache.kafka.connect.source.SourceRecord
import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException


class AvroConverter extends Converter {
  private val avroData = new AvroData(8)
  private var sourceToSchemaMap: Map[String, AvroSchema] = Map.empty
  private var avroReadersMap: Map[String, GenericDatumReader[GenericRecord]] = Map.empty

  override def convert(kafkaTopic: String,
                       sourceTopic: String,
                       messageId: String,
                       bytes: Array[Byte],
                       keys: Seq[String] = Seq.empty,
                       keyDelimiter: String = "."): SourceRecord = {
    Option(bytes) match {
      case None =>
        new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic),
          null,
          kafkaTopic,
          avroData.toConnectSchema(sourceToSchemaMap(sourceTopic)),
          null)
      case Some(_) =>
        val reader = avroReadersMap.getOrElse(sourceTopic.toLowerCase, throw new ConfigException(s"Invalid ${AvroConverter.SCHEMA_CONFIG} is not configured for $sourceTopic"))
        val decoder = DecoderFactory.get().binaryDecoder(bytes, null)
        val record = reader.read(null, decoder)
        val schemaAndValue = avroData.toConnectData(sourceToSchemaMap(sourceTopic.toLowerCase), record)
        val value = schemaAndValue.value()
        value match {
          case s: Struct if keys.nonEmpty =>
            val keysValue = keys.flatMap { key =>
              Option(KeyExtractor.extract(s, key.split('.').toVector)).map(_.toString)
            }.mkString(keyDelimiter)
            new SourceRecord(
              Collections.singletonMap(Converter.TopicKey, sourceTopic),
              null,
              kafkaTopic,
              Schema.STRING_SCHEMA,
              keysValue,
              schemaAndValue.schema(),
              schemaAndValue.value())
          case _ =>
            new SourceRecord(
              Collections.singletonMap(Converter.TopicKey, sourceTopic),
              null,
              kafkaTopic,
              MsgKey.schema,
              MsgKey.getStruct(sourceTopic, messageId),
              schemaAndValue.schema(),
              schemaAndValue.value())
        }

    }
  }

  override def initialize(config: Map[String, String]): Unit = {
    sourceToSchemaMap = AvroConverter.getSchemas(config)
    avroReadersMap = sourceToSchemaMap.map { case (key, schema) =>
      key -> new GenericDatumReader[GenericRecord](schema)
    }
  }
}

object AvroConverter {
  val SCHEMA_CONFIG = "connect.source.converter.avro.schemas"

  def getSchemas(config: Map[String, String]): Map[String, AvroSchema] = {
    config.getOrElse(SCHEMA_CONFIG, throw new ConfigException(s"$SCHEMA_CONFIG is not provided"))
      .toString
      .split(';')
      .filter(_.trim.nonEmpty)
      .map(_.split("="))
      .map {
        case Array(source, path) =>
          val file = new File(path)
          if (!file.exists()) {
            throw new ConfigException(s"Invalid $SCHEMA_CONFIG. The file $path doesn't exist!")
          }
          val s = source.trim.toLowerCase()
          if (s.isEmpty) {
            throw new ConfigException(s"Invalid $SCHEMA_CONFIG. The topic is not valid for entry containing $path")
          }
          s -> new AvroSchema.Parser().parse(file)
        case other => throw new ConfigException(s"$SCHEMA_CONFIG is not properly set. The format is Mqtt_Source->AVRO_FILE")
      }.toMap
  }
} 
Example 17
Source File: JsonSimpleConverter.scala    From kafka-connect-common   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.converters.source

import java.nio.charset.Charset
import java.util
import java.util.Collections

import com.datamountaineer.streamreactor.connect.converters.MsgKey
import org.apache.kafka.connect.data._
import org.apache.kafka.connect.source.SourceRecord


class JsonSimpleConverter extends Converter {
  override def convert(kafkaTopic: String,
                       sourceTopic: String,
                       messageId: String,
                       bytes: Array[Byte],
                       keys:Seq[String] = Seq.empty,
                       keyDelimiter:String = "."): SourceRecord = {
    require(bytes != null, s"Invalid $bytes parameter")
    val json = new String(bytes, Charset.defaultCharset)
    val schemaAndValue = JsonSimpleConverter.convert(sourceTopic, json)
    val value = schemaAndValue.value()
    value match {
      case s:Struct if keys.nonEmpty =>
        val keysValue = keys.flatMap { key =>
          Option(KeyExtractor.extract(s, key.split('.').toVector)).map(_.toString)
        }.mkString(keyDelimiter)

        new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic),
          null,
          kafkaTopic,
          Schema.STRING_SCHEMA,
          keysValue,
          schemaAndValue.schema(),
          schemaAndValue.value())
      case _=>
        new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic),
          null,
          kafkaTopic,
          MsgKey.schema,
          MsgKey.getStruct(sourceTopic, messageId),
          schemaAndValue.schema(),
          schemaAndValue.value())
    }

  }
}

object JsonSimpleConverter {

  import org.json4s._
  import org.json4s.native.JsonMethods._

  def convert(name: String, str: String): SchemaAndValue = convert(name, parse(str))

  def convert(name: String, value: JValue): SchemaAndValue = {
    value match {
      case JArray(arr) =>
        val values = new util.ArrayList[AnyRef]()
        val sv = convert(name, arr.head)
        values.add(sv.value())
        arr.tail.foreach { v => values.add(convert(name, v).value()) }

        val schema = SchemaBuilder.array(sv.schema()).optional().build()
        new SchemaAndValue(schema, values)
      case JBool(b) => new SchemaAndValue(Schema.BOOLEAN_SCHEMA, b)
      case JDecimal(d) =>
        val schema = Decimal.builder(d.scale).optional().build()
        new SchemaAndValue(schema, Decimal.fromLogical(schema, d.bigDecimal))
      case JDouble(d) => new SchemaAndValue(Schema.FLOAT64_SCHEMA, d)
      case JInt(i) => new SchemaAndValue(Schema.INT64_SCHEMA, i.toLong) //on purpose! LONG (we might get later records with long entries)
      case JLong(l) => new SchemaAndValue(Schema.INT64_SCHEMA, l)
      case JNull | JNothing => new SchemaAndValue(Schema.STRING_SCHEMA, null)
      case JString(s) => new SchemaAndValue(Schema.STRING_SCHEMA, s)
      case JObject(values) =>
        val builder = SchemaBuilder.struct().name(name.replace("/", "_"))

        val fields = values.map { case (n, v) =>
          val schemaAndValue = convert(n, v)
          builder.field(n, schemaAndValue.schema())
          n -> schemaAndValue.value()
        }.toMap
        val schema = builder.build()

        val struct = new Struct(schema)
        fields.foreach { case (field, v) => struct.put(field, v) }

        new SchemaAndValue(schema, struct)
    }
  }
} 
Example 18
Source File: JsonOptNullConverter.scala    From kafka-connect-common   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.converters.source

import java.nio.charset.Charset
import java.util
import java.util.Collections

import com.datamountaineer.streamreactor.connect.converters.MsgKey
import org.apache.kafka.connect.data._
import org.apache.kafka.connect.source.SourceRecord


class JsonOptNullConverter extends Converter {
  override def convert(kafkaTopic: String,
                       sourceTopic: String,
                       messageId: String,
                       bytes: Array[Byte],
                       keys:Seq[String] = Seq.empty,
                       keyDelimiter:String = "."): SourceRecord = {
    require(bytes != null, s"Invalid $bytes parameter")
    val json = new String(bytes, Charset.defaultCharset)
    val schemaAndValue = JsonOptNullConverter.convert(sourceTopic, json)
    val value = schemaAndValue.value()
    value match {
      case s:Struct if keys.nonEmpty =>
        val keysValue = keys.flatMap { key =>
          Option(KeyExtractor.extract(s, key.split('.').toVector)).map(_.toString)
        }.mkString(keyDelimiter)

        new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic),
          null,
          kafkaTopic,
          Schema.STRING_SCHEMA,
          keysValue,
          schemaAndValue.schema(),
          schemaAndValue.value())
      case _=>
        new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic),
          null,
          kafkaTopic,
          MsgKey.schema,
          MsgKey.getStruct(sourceTopic, messageId),
          schemaAndValue.schema(),
          schemaAndValue.value())
    }

  }
}

object JsonOptNullConverter {

  import org.json4s._
  import org.json4s.native.JsonMethods._

  def convert(name: String, str: String): SchemaAndValue = convert(name, parse(str))

  def convert(name: String, value: JValue): SchemaAndValue = {
    value match {
      case JArray(arr) =>
        val values = new util.ArrayList[AnyRef]()
        val sv = convert(name, arr.head)
        values.add(sv.value())
        arr.tail.foreach { v => values.add(convert(name, v).value()) }

        val schema = SchemaBuilder.array(sv.schema()).optional().build()
        new SchemaAndValue(schema, values)
      case JBool(b) => new SchemaAndValue(Schema.BOOLEAN_SCHEMA, b)
      case JDecimal(d) =>
        val schema = Decimal.builder(d.scale).optional().build()
        new SchemaAndValue(schema, Decimal.fromLogical(schema, d.bigDecimal))
      case JDouble(d) => new SchemaAndValue(Schema.FLOAT64_SCHEMA, d)
      case JInt(i) => new SchemaAndValue(Schema.INT64_SCHEMA, i.toLong) //on purpose! LONG (we might get later records with long entries)
      case JLong(l) => new SchemaAndValue(Schema.INT64_SCHEMA, l)
      case JNull | JNothing => new SchemaAndValue(Schema.OPTIONAL_STRING_SCHEMA, null)
      case JString(s) => new SchemaAndValue(Schema.STRING_SCHEMA, s)
      case JObject(values) =>
        val builder = SchemaBuilder.struct().name(name.replace("/", "_"))

        val fields = values.map { case (n, v) =>
          val schemaAndValue = convert(n, v)
          builder.field(n, schemaAndValue.schema())
          n -> schemaAndValue.value()
        }.toMap
        val schema = builder.build()

        val struct = new Struct(schema)
        fields.foreach { case (field, v) => struct.put(field, v) }

        new SchemaAndValue(schema, struct)
    }
  }
} 
Example 19
Source File: BytesConverter.scala    From kafka-connect-common   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.converters.source

import java.util.Collections

import com.datamountaineer.streamreactor.connect.converters.MsgKey
import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.source.SourceRecord

class BytesConverter extends Converter {
  override def convert(kafkaTopic: String,
                       sourceTopic: String,
                       messageId: String,
                       bytes: Array[Byte],
                       keys: Seq[String] = Seq.empty,
                       keyDelimiter: String = "."): SourceRecord = {
    new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic),
      null,
      kafkaTopic,
      MsgKey.schema,
      MsgKey.getStruct(sourceTopic, messageId),
      Schema.BYTES_SCHEMA,
      bytes)
  }
} 
Example 20
Source File: TableQuerier.scala    From kafka-connect-sap   with Apache License 2.0 5 votes vote down vote up
package com.sap.kafka.connect.source.querier

import com.sap.kafka.client.hana.HANAJdbcClient
import com.sap.kafka.connect.config.{BaseConfig, BaseConfigConstants}
import com.sap.kafka.connect.config.hana.HANAConfig
import com.sap.kafka.utils.hana.HANAJdbcTypeConverter
import org.apache.kafka.connect.data.{Schema, Struct}
import org.apache.kafka.connect.source.SourceRecord
import org.slf4j.LoggerFactory

import scala.util.Random

abstract class TableQuerier(mode: String, tableOrQuery: String,
                            topic: String, config: BaseConfig,
                            var jdbcClient: Option[HANAJdbcClient])
                extends Comparable[TableQuerier] {
  var tableName: String = if (mode.equals(BaseConfigConstants.QUERY_MODE_TABLE)) tableOrQuery else null
  var query: String = if (mode.equals(BaseConfigConstants.QUERY_MODE_SQL)) tableOrQuery else null

  var lastUpdate: Long = 0
  var schema: Schema = _
  var queryString: Option[String] = None
  var resultList: Option[List[Struct]] = None

  val log = LoggerFactory.getLogger(getClass)

  def getLastUpdate(): Long = lastUpdate

  def getOrCreateQueryString(): Option[String] = {
    createQueryString()
    queryString
  }

  def createQueryString(): Unit

  def querying(): Boolean = resultList.isDefined

  def maybeStartQuery(): Unit = {
    if (resultList.isEmpty) {
      schema = getSchema()
      queryString = getOrCreateQueryString()

      val batchMaxRows = config.batchMaxRows
      resultList = getOrCreateJdbcClient().get.executeQuery(schema, queryString.get,
        0, batchMaxRows)
      log.info(resultList.size.toString)
    }
  }

  def extractRecords(): List[SourceRecord]

  def close(now: Long): Unit = {
    resultList = None
    schema = null

    lastUpdate = now
  }

  protected def getOrCreateJdbcClient(): Option[HANAJdbcClient] = {
    if (jdbcClient.isDefined) {
      return jdbcClient
    }

    config match {
      case hanaConfig: HANAConfig => Some(HANAJdbcClient(hanaConfig))
      case _ => throw new RuntimeException("Cannot create Jdbc Client")
    }
  }

  private def getSchema(): Schema = {
    mode match {
      case BaseConfigConstants.QUERY_MODE_TABLE =>
        if (getOrCreateJdbcClient().get.isInstanceOf[HANAJdbcClient]) {
          val metadata = getOrCreateJdbcClient().get.getMetaData(tableOrQuery, None)
          HANAJdbcTypeConverter.convertHANAMetadataToSchema(tableName, metadata)
        } else {
          throw new RuntimeException("Jdbc Client is not available")
        }
      case BaseConfigConstants.QUERY_MODE_SQL =>
        if (getOrCreateJdbcClient().get.isInstanceOf[HANAJdbcClient]) {
          val metadata = getOrCreateJdbcClient().get.getMetadata(tableOrQuery)
          HANAJdbcTypeConverter.convertHANAMetadataToSchema("Query" + Random.nextInt, metadata)
        } else {
          throw new RuntimeException("Jdbc Client is not available")
        }
      case _ =>
        throw new RuntimeException("Other Query modes are not supported")
    }
  }

  override def compareTo(other: TableQuerier): Int = {
    if (this.lastUpdate < other.lastUpdate) {
      -1
    } else if (this.lastUpdate > other.lastUpdate) {
      0
    } else {
      this.tableName.compareTo(other.tableName)
    }
  }
} 
Example 21
Source File: TestMapKeyToString.scala    From kafka-connect-transformers   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.transforms

import java.util

import com.datamountaineer.streamreactor.connect.transforms.MapKeyToString.Value
import org.apache.kafka.connect.data._
import org.apache.kafka.connect.source.SourceRecord
import org.apache.kafka.connect.transforms.util.Requirements.requireStruct
import org.scalatest.{Matchers, WordSpec}

import scala.collection.JavaConversions._

class TestMapKeyToString extends WordSpec with Matchers {
  val MAP_SCHEMA = SchemaBuilder.map(Schema.OPTIONAL_INT64_SCHEMA, Schema.OPTIONAL_STRING_SCHEMA).optional().build();

  val FIELDS_CONFIG = "fields"

  "should transform all map key to string schema" in {
    val transform = new Value[SourceRecord];
    transform.configure(Map(
      FIELDS_CONFIG -> "map1, map2")
    )

    val transformedRecord = transform.apply(mockRecord(true));

    val value = requireStruct(transformedRecord.value, null)
    val schema = transformedRecord.valueSchema

    schema.field("map1").schema().keySchema().`type`().getName shouldBe "string"
    value.getMap("map1").get("1").toString shouldBe "value1-1"

    schema.field("map2").schema().keySchema().`type`().getName shouldBe "string"
    value.getMap("map2").get("1").toString shouldBe "value2-1"
  }

  "should transform only one map key to string schema" in {
    val transform = new Value[SourceRecord];
    transform.configure(Map(
      FIELDS_CONFIG -> "map1")
    )

    val transformedRecord = transform.apply(mockRecord(true));

    val value = requireStruct(transformedRecord.value, null)
    val schema = transformedRecord.valueSchema

    schema.field("map1").schema().keySchema().`type`().getName shouldBe "string"
    value.getMap("map1").get("1").toString shouldBe "value1-1"

    schema.field("map2").schema().keySchema().`type`().getName shouldBe "int64"
    value.getMap("map2").get(1L).toString shouldBe "value2-1"
  }

  private def mockRecord(withSchema: Boolean) = {
    val simpleStructSchema = SchemaBuilder.struct.name("name").version(1).doc("doc")
      .field("magic", Schema.OPTIONAL_INT64_SCHEMA)
      .field("map1", MAP_SCHEMA)
      .field("map2", MAP_SCHEMA)
      .build

    val simpleStruct = new Struct(simpleStructSchema)
      .put("magic", 42L)
      .put("map1", new util.HashMap[Long, String]{
        put(1L,"value1-1")
        put(2L,"value1-2")
      })
      .put("map2", new util.HashMap[Long, String]{
        put(1L,"value2-1")
        put(2L,"value2-2")
      })

    new SourceRecord(null, null, "test", 0, if (withSchema) simpleStructSchema else null, simpleStruct)
  }

} 
Example 22
Source File: TestNestingFields.scala    From kafka-connect-transformers   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.transforms

import java.util.Date

import com.datamountaineer.streamreactor.connect.transforms.NestingFields.Value
import org.apache.kafka.connect.data._
import org.apache.kafka.connect.source.SourceRecord
import org.apache.kafka.connect.transforms.util.Requirements.requireStruct
import org.scalatest.{Matchers, WordSpec}

import scala.collection.JavaConversions._

class TestNestingFields extends WordSpec with Matchers {
  val OPTIONAL_TIMESTAMP_SCHEMA = Timestamp.builder().optional().build()
  val OPTIONAL_DECIMAL_SCHEMA = Decimal.builder(18).optional().build()

  private val NESTED_NAME_CONFIG = "nested.name"
  private val FIELDS_CONFIG = "fields"

  "should append another field with two nested fields when have schema" in {
    val transform = new Value[SourceRecord];
    transform.configure(Map(
      NESTED_NAME_CONFIG -> "id",
      FIELDS_CONFIG -> "dateValue1, decimalValue1")
    )

    val transformedRecord = transform.apply(mockRecord(true));

    val value = requireStruct(transformedRecord.value, null)
    val schema = transformedRecord.valueSchema

    val nestedSchema = schema.field("id").schema()
    val nestedValue =  requireStruct(value.get("id"), null)

    nestedSchema.field("dateValue1").schema().`type`() shouldBe schema.field("dateValue1").schema().`type`()
    nestedValue.get("dateValue1") shouldBe value.get("dateValue1")

    nestedSchema.field("decimalValue1").schema().`type`() shouldBe schema.field("decimalValue1").schema().`type`()
    nestedValue.get("decimalValue1") shouldBe value.get("decimalValue1")
  }

  "should append another field with one nested fields when have schema" in {
    val transform = new Value[SourceRecord];
    transform.configure(Map(
      NESTED_NAME_CONFIG -> "id",
      FIELDS_CONFIG -> "decimalValue1")
    )

    val transformedRecord = transform.apply(mockRecord(true));

    val value = requireStruct(transformedRecord.value, null)
    val schema = transformedRecord.valueSchema

    val nestedSchema = schema.field("id").schema()
    val nestedValue =  requireStruct(value.get("id"), null)

    nestedSchema.field("decimalValue1").schema().`type`() shouldBe schema.field("decimalValue1").schema().`type`()
    nestedValue.get("decimalValue1") shouldBe value.get("decimalValue1")
  }

  "should append another field with one nested fields when don't have schema" in {
    val transform = new Value[SourceRecord];
    transform.configure(Map(
      NESTED_NAME_CONFIG -> "id",
      FIELDS_CONFIG -> "decimalValue1")
    )

    val transformedRecord = transform.apply(mockRecord(true));

    val value = requireStruct(transformedRecord.value, null)

    val nestedValue =  requireStruct(value.get("id"), null)
    nestedValue.get("decimalValue1") shouldBe value.get("decimalValue1")
  }

  private def mockRecord(withSchema: Boolean) = {
    val simpleStructSchema = SchemaBuilder.struct.name("name").version(1).doc("doc")
      .field("magic", Schema.OPTIONAL_INT64_SCHEMA)
      .field("dateValue1", OPTIONAL_TIMESTAMP_SCHEMA)
      .field("decimalValue1", OPTIONAL_DECIMAL_SCHEMA)
      .build

    val simpleStruct = new Struct(simpleStructSchema)
      .put("magic", 42L)
      .put("dateValue1", new Date())
      .put("decimalValue1", BigDecimal(10.6).bigDecimal.setScale(18))

    new SourceRecord(null, null, "test", 0, if (withSchema) simpleStructSchema else null, simpleStruct)
  }

} 
Example 23
Source File: CassandraSourceTask.scala    From kafka-connect-cassandra   with Apache License 2.0 5 votes vote down vote up
package com.tuplejump.kafka.connect.cassandra

import java.util.{List => JList, Map => JMap, ArrayList => JArrayList}

import org.apache.kafka.connect.connector.Task
import org.apache.kafka.connect.source.{SourceRecord, SourceTask}


  override def poll: JList[SourceRecord] = {

    val records = new JArrayList[SourceRecord]()
    val offset = EmptyJMap//context.offsetStorageReader.offset(EmptyJMap) //TODO
    val partition = EmptyJMap //TODO

    for {
      sc       <- taskConfig.source
      iterator <- page(sc)
      row      <- iterator
    } {
      val record = row.as(sc.schema.route.topic, partition, offset)
      records.add(record)
      if (iterator.done) checkpoint = None //TODO
      record
    }

    records
  }

  private def page(sc: SourceConfig): Option[AsyncPagingSourceIterator] = {
    //TODO need CDC: https://github.com/tuplejump/kafka-connector/issues/9
    val query = sc.query match {
      case q if q.hasPatternT =>
        //TODO remove Thread.sleep with better option like timestamp.fromNow...etc
        Thread.sleep(sc.query.pollInterval)
        sc.query.slide
      case q =>
        // TODO typed: https://tuplejump.atlassian.net/browse/DB-56 timeuuid,timestamp...
        // by type: WHERE {columnToMove} > checkpoint.value with columnType
        sc.query
    }

    val rs = session.execute(query.cql)
    if (rs.getAvailableWithoutFetching > 0) Some(new AsyncPagingSourceIterator(rs, sc.options.fetchSize))
    else None
  }
} 
Example 24
Source File: TimeBasedDataService.scala    From kafka-jdbc-connector   with Apache License 2.0 5 votes vote down vote up
package com.agoda.kafka.connector.jdbc.services

import java.sql.{Connection, PreparedStatement, ResultSet, Timestamp}
import java.util.{Date, GregorianCalendar, TimeZone}

import com.agoda.kafka.connector.jdbc.JdbcSourceConnectorConstants
import com.agoda.kafka.connector.jdbc.models.DatabaseProduct
import com.agoda.kafka.connector.jdbc.models.DatabaseProduct.{MsSQL, MySQL}
import com.agoda.kafka.connector.jdbc.models.Mode.TimestampMode
import com.agoda.kafka.connector.jdbc.utils.DataConverter
import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.source.SourceRecord

import scala.collection.JavaConverters._
import scala.collection.mutable.ListBuffer
import scala.util.Try


case class TimeBasedDataService(databaseProduct: DatabaseProduct,
                                storedProcedureName: String,
                                batchSize: Int,
                                batchSizeVariableName: String,
                                timestampVariableName: String,
                                var timestampOffset: Long,
                                timestampFieldName: String,
                                topic: String,
                                keyFieldOpt: Option[String],
                                dataConverter: DataConverter,
                                calendar: GregorianCalendar = new GregorianCalendar(TimeZone.getTimeZone("UTC"))
                               ) extends DataService {

  override def createPreparedStatement(connection: Connection): Try[PreparedStatement] = Try {
    val preparedStatement = databaseProduct match {
      case MsSQL => connection.prepareStatement(s"EXECUTE $storedProcedureName @$timestampVariableName = ?, @$batchSizeVariableName = ?")
      case MySQL => connection.prepareStatement(s"CALL $storedProcedureName (@$timestampVariableName := ?, @$batchSizeVariableName := ?)")
    }
    preparedStatement.setTimestamp(1, new Timestamp(timestampOffset), calendar)
    preparedStatement.setObject(2, batchSize)
    preparedStatement
  }

  override def extractRecords(resultSet: ResultSet, schema: Schema): Try[Seq[SourceRecord]] = Try {
    val sourceRecords = ListBuffer.empty[SourceRecord]
    var max = timestampOffset
    while (resultSet.next()) {
      dataConverter.convertRecord(schema, resultSet) map { record =>
        val time = record.get(timestampFieldName).asInstanceOf[Date].getTime
        max = if(time > max) {
          keyFieldOpt match {
            case Some(keyField) =>
              sourceRecords += new SourceRecord(
                Map(JdbcSourceConnectorConstants.STORED_PROCEDURE_NAME_KEY -> storedProcedureName).asJava,
                Map(TimestampMode.entryName -> time).asJava, topic, null, schema, record.get(keyField), schema, record
              )
            case None           =>
              sourceRecords += new SourceRecord(
                Map(JdbcSourceConnectorConstants.STORED_PROCEDURE_NAME_KEY -> storedProcedureName).asJava,
                Map(TimestampMode.entryName -> time).asJava, topic, schema, record
              )
          }
          time
        } else max
      }
    }
    timestampOffset = max
    sourceRecords
  }

  override def toString: String = {
    s"""
       |{
       |   "name" : "${this.getClass.getSimpleName}"
       |   "mode" : "${TimestampMode.entryName}"
       |   "stored-procedure.name" : "$storedProcedureName"
       |}
    """.stripMargin
  }
} 
Example 25
Source File: DataService.scala    From kafka-jdbc-connector   with Apache License 2.0 5 votes vote down vote up
package com.agoda.kafka.connector.jdbc.services

import java.sql.{Connection, PreparedStatement, ResultSet}

import com.agoda.kafka.connector.jdbc.utils.DataConverter
import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.source.SourceRecord

import scala.concurrent.duration.Duration
import scala.util.Try

trait DataService {


  def getRecords(connection: Connection, timeout: Duration): Try[Seq[SourceRecord]] = {
    for {
      preparedStatement <- createPreparedStatement(connection)
      resultSet         <- executeStoredProcedure(preparedStatement, timeout)
      schema            <- dataConverter.convertSchema(storedProcedureName, resultSet.getMetaData)
      records           <- extractRecords(resultSet, schema)
    } yield records
  }

  protected def createPreparedStatement(connection: Connection): Try[PreparedStatement]

  protected def extractRecords(resultSet: ResultSet, schema: Schema): Try[Seq[SourceRecord]]

  private def executeStoredProcedure(preparedStatement: PreparedStatement, timeout: Duration): Try[ResultSet] = Try {
    preparedStatement.setQueryTimeout(timeout.toSeconds.toInt)
    preparedStatement.executeQuery
  }
} 
Example 26
Source File: DataServiceTest.scala    From kafka-jdbc-connector   with Apache License 2.0 5 votes vote down vote up
package com.agoda.kafka.connector.jdbc.services

import java.sql.{Connection, PreparedStatement, ResultSet, ResultSetMetaData}

import com.agoda.kafka.connector.jdbc.utils.DataConverter
import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.source.SourceRecord
import org.scalatest.mockito.MockitoSugar
import org.mockito.Mockito._
import org.scalatest.{Matchers, WordSpec}

import scala.concurrent.duration._
import scala.util.Success

class DataServiceTest extends WordSpec with Matchers with MockitoSugar {

  "Data Service" should {

    val spName = "stored-procedure"
    val connection = mock[Connection]
    val converter = mock[DataConverter]
    val sourceRecord1 = mock[SourceRecord]
    val sourceRecord2 = mock[SourceRecord]
    val resultSet = mock[ResultSet]
    val resultSetMetadata = mock[ResultSetMetaData]
    val preparedStatement = mock[PreparedStatement]
    val schema = mock[Schema]

    val dataService = new DataService {

      override def storedProcedureName: String = spName

      override protected def createPreparedStatement(connection: Connection) = Success(preparedStatement)

      override protected def extractRecords(resultSet: ResultSet, schema: Schema) = Success(Seq(sourceRecord1, sourceRecord2))

      override def dataConverter: DataConverter = converter
    }

    "get records" in {
      doNothing().when(preparedStatement).setQueryTimeout(1)
      when(preparedStatement.executeQuery).thenReturn(resultSet)
      when(resultSet.getMetaData).thenReturn(resultSetMetadata)
      when(converter.convertSchema(spName, resultSetMetadata)).thenReturn(Success(schema))

      dataService.getRecords(connection, 1.second) shouldBe Success(Seq(sourceRecord1, sourceRecord2))

      verify(preparedStatement).setQueryTimeout(1)
      verify(preparedStatement).executeQuery
      verify(resultSet).getMetaData
      verify(converter).convertSchema(spName, resultSetMetadata)
    }
  }
} 
Example 27
Source File: IotHubPartitionSource.scala    From toketi-kafka-connect-iothub   with MIT License 5 votes vote down vote up
// Copyright (c) Microsoft. All rights reserved.

package com.microsoft.azure.iot.kafka.connect.source

import java.util.{Collections, Map}

import com.typesafe.scalalogging.LazyLogging
import org.apache.kafka.connect.data.Struct
import org.apache.kafka.connect.errors.ConnectException
import org.apache.kafka.connect.source.SourceRecord

import scala.collection.mutable.ListBuffer
import scala.util.control.NonFatal

class IotHubPartitionSource(val dataReceiver: DataReceiver,
    val partition: String,
    val topic: String,
    val batchSize: Int,
    val eventHubName: String,
    val sourcePartition: Map[String, String])
  extends LazyLogging
    with JsonSerialization {

  def getRecords: List[SourceRecord] = {

    logger.debug(s"Polling for data from eventHub $eventHubName partition $partition")
    val list = ListBuffer.empty[SourceRecord]
    try {
      val messages: Iterable[IotMessage] = this.dataReceiver.receiveData(batchSize)

      if (messages.isEmpty) {
        logger.debug(s"Finished processing all messages from eventHub $eventHubName " +
          s"partition ${this.partition}")
      } else {
        logger.debug(s"Received ${messages.size} messages from eventHub $eventHubName " +
          s"partition ${this.partition} (requested $batchSize batch)")

        for (msg: IotMessage <- messages) {

          val kafkaMessage: Struct = IotMessageConverter.getIotMessageStruct(msg)
          val sourceOffset = Collections.singletonMap("EventHubOffset",
            kafkaMessage.getString(IotMessageConverter.offsetKey))
          val sourceRecord = new SourceRecord(sourcePartition, sourceOffset, this.topic, kafkaMessage.schema(),
            kafkaMessage)
          list += sourceRecord
        }
      }
    } catch {
      case NonFatal(e) =>
        val errorMsg = s"Error while getting SourceRecords for eventHub $eventHubName " +
          s"partition $partition. Exception - ${e.toString} Stack trace - ${e.printStackTrace()}"
        logger.error(errorMsg)
        throw new ConnectException(errorMsg, e)
    }
    logger.debug(s"Obtained ${list.length} SourceRecords from IotHub")
    list.toList
  }
} 
Example 28
Source File: SQSSourceTask.scala    From sqs-kafka-connect   with Apache License 2.0 5 votes vote down vote up
package com.hivehome.kafka.connect.sqs

import java.util.{List => JList, Map => JMap}
import javax.jms._

import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.source.{SourceRecord, SourceTask}
import org.slf4j.LoggerFactory

import scala.collection.JavaConverters._
import scala.util.Try
import scala.util.control.NonFatal

object SQSSourceTask {
  private val SqsQueueField: String = "queue"
  private val MessageId: String = "messageId"
  private val ValueSchema = Schema.STRING_SCHEMA
}

class SQSSourceTask extends SourceTask {
  val logger = LoggerFactory.getLogger(getClass.getName)
  private var conf: Conf = _
  private var consumer: MessageConsumer = null
  // MessageId to MessageHandle used to ack the message on the commitRecord method invocation
  private var unAcknowledgedMessages = Map[String, Message]()

  def version: String = Version()

  def start(props: JMap[String, String]): Unit = {
    conf = Conf.parse(props.asScala.toMap).get

    logger.debug("Creating consumer...")
    synchronized {
      try {
        consumer = SQSConsumer(conf)
        logger.info("Created consumer to  SQS topic {} for reading", conf.queueName)
      }
      catch {
        case NonFatal(e) => logger.error("Exception", e)
      }
    }
  }

  import com.hivehome.kafka.connect.sqs.SQSSourceTask._

  @throws(classOf[InterruptedException])
  def poll: JList[SourceRecord] = {
    def toRecord(msg: Message): SourceRecord = {
      val extracted = MessageExtractor(msg)
      val key = Map(SqsQueueField -> conf.queueName.get).asJava
      val value = Map(MessageId -> msg.getJMSMessageID).asJava
      new SourceRecord(key, value, conf.topicName.get, ValueSchema, extracted)
    }

    assert(consumer != null) // should be initialised as part of start()
    Try {
      Option(consumer.receive).map { msg =>
        logger.info("Received message {}", msg)

        // This operation is not threadsafe as a result the plugin is not threadsafe.
        // However KafkaConnect assigns a single thread to each task and the poll
        // method is always called by a single thread.
        unAcknowledgedMessages = unAcknowledgedMessages.updated(msg.getJMSMessageID, msg)

        toRecord(msg)
      }.toSeq
    }.recover {
      case NonFatal(e) =>
        logger.error("Exception while processing message", e)
        List.empty
    }.get.asJava
  }

  @throws(classOf[InterruptedException])
  override def commitRecord(record: SourceRecord): Unit = {
    val msgId = record.sourceOffset().get(MessageId).asInstanceOf[String]
    val maybeMsg = unAcknowledgedMessages.get(msgId)
    maybeMsg.foreach(_.acknowledge())
    unAcknowledgedMessages = unAcknowledgedMessages - msgId
  }

  def stop() {
    logger.debug("Stopping task")
    synchronized {
      unAcknowledgedMessages = Map()
      try {
        if (consumer != null) {
          consumer.close()
          logger.debug("Closed input stream")
        }
      }
      catch {
        case NonFatal(e) => logger.error("Failed to close consumer stream: ", e)
      }
      this.notify()
    }
  }
} 
Example 29
Source File: ValidatorTask.scala    From ohara   with Apache License 2.0 5 votes vote down vote up
package oharastream.ohara.connector.validation

import java.util
import java.util.concurrent.TimeUnit

import oharastream.ohara.client.configurator.InspectApi.{RdbInfo, RdbQuery}
import oharastream.ohara.client.configurator.{ErrorApi, InspectApi}
import oharastream.ohara.client.database.DatabaseClient
import oharastream.ohara.common.data.Serializer
import oharastream.ohara.common.util.VersionUtils
import org.apache.kafka.connect.data.Schema
import org.apache.kafka.connect.source.{SourceRecord, SourceTask}
import spray.json.{JsObject, _}

import scala.jdk.CollectionConverters._
class ValidatorTask extends SourceTask {
  private[this] var done                       = false
  private[this] var props: Map[String, String] = _
  private[this] val topic: String              = InspectApi.INTERNAL_TOPIC_KEY.topicNameOnKafka
  private[this] var requestId: String          = _
  override def start(props: util.Map[String, String]): Unit = {
    this.props = props.asScala.toMap
    requestId = require(InspectApi.REQUEST_ID)
  }

  override def poll(): util.List[SourceRecord] =
    if (done) {
      // just wait the configurator to close this connector
      TimeUnit.SECONDS.sleep(2)
      null
    } else
      try information match {
        case query: RdbQuery => toSourceRecord(validate(query))
      } catch {
        case e: Throwable => toSourceRecord(ErrorApi.of(e))
      } finally done = true

  override def stop(): Unit = {
    // do nothing
  }

  override def version(): String = VersionUtils.VERSION

  private[this] def validate(query: RdbQuery): RdbInfo = {
    val client = DatabaseClient.builder.url(query.url).user(query.user).password(query.password).build
    try RdbInfo(
      name = client.databaseType,
      tables = client.tableQuery
        .catalog(query.catalogPattern.orNull)
        .schema(query.schemaPattern.orNull)
        .tableName(query.tableName.orNull)
        .execute()
    )
    finally client.close()
  }

  private[this] def toJsObject: JsObject = props(InspectApi.SETTINGS_KEY).parseJson.asJsObject
  private[this] def information = require(InspectApi.TARGET_KEY) match {
    case InspectApi.RDB_KIND => InspectApi.RDB_QUERY_FORMAT.read(toJsObject)
    case other: String =>
      throw new IllegalArgumentException(
        s"valid targets are ${InspectApi.RDB_KIND}. current is $other"
      )
  }

  private[this] def toSourceRecord(data: Object): util.List[SourceRecord] =
    util.Arrays.asList(
      new SourceRecord(
        null,
        null,
        topic,
        Schema.BYTES_SCHEMA,
        Serializer.STRING.to(requestId),
        Schema.BYTES_SCHEMA,
        Serializer.OBJECT.to(data)
      )
    )

  private[this] def require(key: String): String =
    props.getOrElse(key, throw new IllegalArgumentException(s"the $key is required"))
} 
Example 30
Source File: BulkTableQuerier.scala    From kafka-connect-sap   with Apache License 2.0 5 votes vote down vote up
package com.sap.kafka.connect.source.querier

import com.sap.kafka.client.hana.HANAJdbcClient
import com.sap.kafka.connect.config.{BaseConfig, BaseConfigConstants}
import com.sap.kafka.connect.source.SourceConnectorConstants
import org.apache.kafka.common.config.ConfigException
import org.apache.kafka.connect.source.SourceRecord

import scala.collection.JavaConverters._

class BulkTableQuerier(mode: String, tableOrQuery: String, tablePartition: Int, topic: String,
                       config: BaseConfig, jdbcClient: Option[HANAJdbcClient])
  extends TableQuerier(mode, tableOrQuery, topic, config, jdbcClient) {
  override def createQueryString(): Unit = {
    mode match {
      case BaseConfigConstants.QUERY_MODE_TABLE =>
        if (tablePartition > 0) {
          queryString = Some(s"select * from $tableName PARTITION($tablePartition)")
        } else {
          queryString = Some(s"select * from $tableName")
        }
      case BaseConfigConstants.QUERY_MODE_SQL =>
        queryString = Some(query)
    }
  }

  override def extractRecords(): List[SourceRecord] = {
    if (resultList.isDefined) {
      resultList.get.map(record => {
        var partition: Map[String, String] = null

        mode match {
          case BaseConfigConstants.QUERY_MODE_TABLE =>
            partition = Map(SourceConnectorConstants.TABLE_NAME_KEY -> tableName)
          case BaseConfigConstants.QUERY_MODE_SQL =>
            val partitionName = "Query"
            partition = Map(SourceConnectorConstants.QUERY_NAME_KEY -> partitionName)
          case _ => throw new ConfigException(s"Unexpected query mode: $mode")
        }
        new SourceRecord(partition.asJava, null, topic,
          getPartition(tablePartition, topic), record.schema(), record)
      })
    }
    else List()
  }

  override def toString: String = "BulkTableQuerier{" +
    "name='" + tableOrQuery + '\'' +
    ", topic='" + topic + '\'' +
    '}'

  
  private def getPartition(tablePartition: Int, topic: String): Int = {
    val topicProperties = config.topicProperties(topic)
    val maxPartitions = topicProperties("partition.count").toInt
    tablePartition % maxPartitions
  }
} 
Example 31
Source File: HANASourceTaskConversionTest.scala    From kafka-connect-sap   with Apache License 2.0 5 votes vote down vote up
package com.sap.kafka.connect.source

import com.sap.kafka.client.MetaSchema
import org.apache.kafka.connect.data.Schema.Type
import org.apache.kafka.connect.data.{Field, Schema, Struct}
import org.apache.kafka.connect.source.SourceRecord

import scala.collection.JavaConverters._

class HANASourceTaskConversionTest extends HANASourceTaskTestBase {

  override def beforeAll(): Unit = {
    super.beforeAll()
    task.start(singleTableConfig())
  }

  override def afterAll(): Unit = {
    task.stop()
    super.afterAll()
  }

  test("boolean type") {
    typeConversion(Schema.BOOLEAN_SCHEMA, true, java.lang.Boolean.FALSE,
      Schema.BOOLEAN_SCHEMA, java.lang.Boolean.FALSE)
  }

  test("int type") {
    typeConversion(Schema.INT32_SCHEMA, true, new java.lang.Integer(1),
      Schema.INT32_SCHEMA, new Integer(1))
  }

  test("long type") {
    typeConversion(Schema.INT64_SCHEMA, true, new java.lang.Long(1),
      Schema.INT64_SCHEMA, new java.lang.Long(1))
  }

  test("double type") {
    typeConversion(Schema.FLOAT64_SCHEMA, true, new java.lang.Double(1.0),
      Schema.FLOAT64_SCHEMA, new java.lang.Double(1.0))
  }

  test("string type") {
    typeConversion(Schema.STRING_SCHEMA, true, "'a'",
      Schema.STRING_SCHEMA, "a")
  }

  private def typeConversion(sqlType: Schema, nullable: Boolean,
                             sqlValue: Object, convertedSchema: Schema,
                             convertedValue: Object): Unit = {
    val fields = Seq(new Field("id", 1, sqlType))
    jdbcClient.createTable(Some("TEST"), "EMPLOYEES_SOURCE", MetaSchema(null, fields),
      3000)
    val connection = jdbcClient.getConnection
    val stmt = connection.createStatement()
    stmt.execute("insert into \"TEST\".\"EMPLOYEES_SOURCE\" values(" + sqlValue.toString + ")")
    val records = task.poll()
    validateRecords(records.asScala.toList, convertedSchema, convertedValue)
    stmt.execute("drop table \"TEST\".\"EMPLOYEES_SOURCE\"")
  }

  private def validateRecords(records: List[SourceRecord], expectedFieldSchema: Schema,
                              expectedValue: Object): Unit = {
    assert(records.size === 1)
    val objValue = records.head.value()
    assert(objValue.isInstanceOf[Struct])
    val value = objValue.asInstanceOf[Struct]

    val schema = value.schema()
    assert(Type.STRUCT === schema.`type`())
    val fields = schema.fields()

    assert(fields.size() === 1)

    val fieldSchema = fields.get(0).schema()
    assert(expectedFieldSchema === fieldSchema)

    assert(expectedValue === value.get(fields.get(0)))
  }
} 
Example 32
Source File: Transaction.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.blockchain.data

import java.util

import com.datamountaineer.streamreactor.connect.blockchain.data.Input._
import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct}
import org.apache.kafka.connect.source.SourceRecord

case class Transaction(lock_time: Long,
                       ver: Int,
                       size: Long,
                       inputs: Seq[Input],
                       rbf: Option[Boolean],
                       time: Long,
                       tx_index: Long,
                       vin_sz: Int,
                       hash: String,
                       vout_sz: Int,
                       relayed_by: String,
                       out: Seq[Output])


object Transaction {
  val ConnectSchema: Schema = SchemaBuilder.struct
    .name("datamountaineer.blockchain.transaction")
    .field("lock_time", Schema.INT64_SCHEMA)
    .field("ver", Schema.INT32_SCHEMA)
    .field("size", Schema.INT64_SCHEMA)
    .field("inputs", SchemaBuilder.array(Input.ConnectSchema).optional().build())
    .field("rbf", Schema.OPTIONAL_BOOLEAN_SCHEMA)
    .field("time", Schema.INT64_SCHEMA)
    .field("tx_index", Schema.INT64_SCHEMA)
    .field("vin_sz", Schema.INT32_SCHEMA)
    .field("hash", Schema.STRING_SCHEMA)
    .field("vout_sz", Schema.INT32_SCHEMA)
    .field("relayed_by", Schema.STRING_SCHEMA)
    .field("out", SchemaBuilder.array(Output.ConnectSchema).optional().build())
    .build()

  implicit class TransactionToSourceRecordConverter(val tx: Transaction) extends AnyVal {
    def toSourceRecord(topic: String, partition: Int, key: Option[String]): SourceRecord = {
      new SourceRecord(
        null,
        null,
        topic,
        partition,
        key.map(_ => Schema.STRING_SCHEMA).orNull,
        key.orNull,
        ConnectSchema,
        tx.toStruct()
      )
    }

    //private def getOffset() = Collections.singletonMap("position", System.currentTimeMillis())

    def toStruct(): Struct = {
      val struct = new Struct(ConnectSchema)
        .put("lock_time", tx.lock_time)
        .put("ver", tx.ver)
        .put("size", tx.size)
        .put("time", tx.time)
        .put("tx_index", tx.tx_index)
        .put("vin_sz", tx.vin_sz)
        .put("hash", tx.hash)
        .put("vout_sz", tx.vout_sz)
        .put("relayed_by", tx.relayed_by)

      tx.out.headOption.foreach { _ =>
        import scala.collection.JavaConverters._
        struct.put("out", tx.out.map(_.toStruct()).asJava)
      }
      tx.rbf.foreach(struct.put("rbf", _))
      tx.inputs.headOption.foreach { _ =>
        val inputs = new util.ArrayList[Struct]
        tx.inputs.foreach(i => inputs.add(i.toStruct()))
        struct.put("inputs", inputs)
      }
      tx.out.headOption.foreach { _ =>
        val outputs = new util.ArrayList[Struct]
        tx.out.foreach(output => outputs.add(output.toStruct()))
      }

      struct
    }
  }

} 
Example 33
Source File: JMSReader.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.jms.source.readers

import com.datamountaineer.streamreactor.connect.converters.source.Converter
import com.datamountaineer.streamreactor.connect.jms.JMSSessionProvider
import com.datamountaineer.streamreactor.connect.jms.config.JMSSettings
import com.datamountaineer.streamreactor.connect.jms.source.domain.JMSStructMessage
import com.typesafe.scalalogging.StrictLogging
import javax.jms.{Message, MessageConsumer}
import org.apache.kafka.connect.source.SourceRecord

import scala.util.Try


class JMSReader(settings: JMSSettings) extends StrictLogging {

  val provider = JMSSessionProvider(settings)
  provider.start()
  val consumers: Vector[(String, MessageConsumer)] = (provider.queueConsumers ++ provider.topicsConsumers).toVector
  val convertersMap: Map[String, Option[Converter]] = settings.settings.map(s => (s.source, s.sourceConverters)).toMap
  val topicsMap: Map[String, String] = settings.settings.map(s => (s.source, s.target)).toMap

  def poll(): Vector[(Message, SourceRecord)] = {
    val messages = consumers
      .flatMap({ case (source, consumer) =>
        (0 to settings.batchSize)
          .flatMap(_ => Option(consumer.receiveNoWait()))
          .map(m => (m, convert(source, topicsMap(source), m)))
      })

    messages
  }

  def convert(source: String, target: String, message: Message): SourceRecord = {
    convertersMap(source).getOrElse(None) match {
      case c: Converter => c.convert(target, source, message.getJMSMessageID, JMSStructMessage.getPayload(message))
      case None => JMSStructMessage.getStruct(target, message)
    }
  }

  def stop: Try[Unit] = provider.close()
}

object JMSReader {
  def apply(settings: JMSSettings): JMSReader = new JMSReader(settings)
} 
Example 34
Source File: HiveSource.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.landoop.streamreactor.connect.hive.source

import com.landoop.streamreactor.connect.hive
import com.landoop.streamreactor.connect.hive._
import com.landoop.streamreactor.connect.hive.formats.{HiveFormat, HiveReader, Record}
import com.landoop.streamreactor.connect.hive.source.config.HiveSourceConfig
import com.landoop.streamreactor.connect.hive.source.mapper.{PartitionValueMapper, ProjectionMapper}
import com.landoop.streamreactor.connect.hive.source.offset.HiveSourceOffsetStorageReader
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.hive.metastore.IMetaStoreClient
import org.apache.kafka.connect.data.Struct
import org.apache.kafka.connect.source.SourceRecord

import scala.collection.JavaConverters._


class HiveSource(db: DatabaseName,
                 tableName: TableName,
                 topic: Topic,
                 offsetReader: HiveSourceOffsetStorageReader,
                 config: HiveSourceConfig)
                (implicit client: IMetaStoreClient, fs: FileSystem) extends Iterator[SourceRecord] {

  val tableConfig = config.tableOptions.filter(_.tableName == tableName).find(_.topic == topic)
    .getOrElse(sys.error(s"Cannot find table configuration for ${db.value}.${tableName.value} => ${topic.value}"))

  private val table = client.getTable(db.value, tableName.value)
  private val format = HiveFormat(hive.serde(table))
  private val metastoreSchema = HiveSchemas.toKafka(table)
  private val parts = TableFileScanner.scan(db, tableName)

  private val readers = parts.map { case (path, partition) =>

    val fns: Seq[Struct => Struct] = Seq(
      partition.map(new PartitionValueMapper(_).map _),
      tableConfig.projection.map(new ProjectionMapper(_).map _)
    ).flatten
    val mapper: Struct => Struct = Function.chain(fns)

    val sourceOffset = offsetReader.offset(SourcePartition(db, tableName, topic, path)).getOrElse(SourceOffset(0))

    new HiveReader {
      lazy val reader = format.reader(path, sourceOffset.rowNumber, metastoreSchema)
      override def iterator: Iterator[Record] = reader.iterator.map { record =>
        Record(mapper(record.struct), record.path, record.offset)
      }
      override def close(): Unit = reader.close()
    }
  }

  private val iterator: Iterator[Record] = readers.map(_.iterator).reduce(_ ++ _).take(tableConfig.limit)

  override def hasNext: Boolean = iterator.hasNext

  override def next(): SourceRecord = {

    val record = iterator.next
    val sourcePartition = SourcePartition(db, tableName, topic, record.path)
    val offset = SourceOffset(record.offset)

    new SourceRecord(
      fromSourcePartition(sourcePartition).asJava,
      fromSourceOffset(offset).asJava,
      topic.value,
      record.struct.schema,
      record.struct
    )
  }

  def close(): Unit = {
    readers.foreach(_.close())
  }
} 
Example 35
Source File: ReThinkSourceReadersFactory.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.rethink.source

import java.util
import java.util.concurrent.LinkedBlockingQueue
import java.util.concurrent.atomic.AtomicBoolean

import com.datamountaineer.streamreactor.connect.rethink.ReThinkConnection
import com.datamountaineer.streamreactor.connect.rethink.config.{ReThinkSourceConfig, ReThinkSourceSetting, ReThinkSourceSettings}
import com.rethinkdb.RethinkDB
import com.rethinkdb.net.{Connection, Cursor}
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.connect.data.SchemaBuilder
import org.apache.kafka.connect.source.SourceRecord

import scala.collection.JavaConverters._
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future

object ReThinkSourceReadersFactory {

  def apply(config: ReThinkSourceConfig, r: RethinkDB): Set[ReThinkSourceReader] = {
    val conn = Some(ReThinkConnection(r, config))
    val settings = ReThinkSourceSettings(config)
    settings.map(s => new ReThinkSourceReader(r, conn.get, s))
  }
}

class ReThinkSourceReader(rethink: RethinkDB, conn: Connection, setting: ReThinkSourceSetting)
  extends StrictLogging {

  logger.info(s"Initialising ReThink Reader for ${setting.source}")
  private val keySchema = SchemaBuilder.string().optional().build()
  private val valueSchema = ChangeFeedStructBuilder.schema
  private val sourcePartition = Map.empty[String, String]
  private val offset = Map.empty[String, String]
  private val stopFeed = new AtomicBoolean(false)
  private val handlingFeed = new AtomicBoolean(false)
  private var feed : Cursor[util.HashMap[String, String]] = _
  val queue = new LinkedBlockingQueue[SourceRecord]()
  val batchSize = setting.batchSize

  def start() = {
    feed = getChangeFeed()
    startFeed(feed)
  }

  def stop() = {
    logger.info(s"Closing change feed for ${setting.source}")
    stopFeed.set(true)
    while (handlingFeed.get()) {
      logger.debug("Waiting for feed to shutdown...")
      Thread.sleep(1000)
    }
    feed.close()
    logger.info(s"Change feed closed for ${setting.source}")
  }

  
  private def handleFeed(feed: Cursor[util.HashMap[String, String]]) = {
    handlingFeed.set(true)

    //feed.next is blocking
    while(!stopFeed.get()) {
      logger.debug(s"Waiting for next change feed event for ${setting.source}")
      val cdc = convert(feed.next().asScala.toMap)
      queue.put(cdc)
    }
    handlingFeed.set(false)
  }

  private def getChangeFeed(): Cursor[util.HashMap[String, String]] = {
    logger.info(s"Initialising change feed for ${setting.source}")
    rethink
      .db(setting.db)
      .table(setting.source)
      .changes()
      .optArg("include_states", true)
      .optArg("include_initial", setting.initialise)
      .optArg("include_types", true)
      .run(conn)
  }

  private def convert(feed: Map[String, String]) = {
    new SourceRecord(sourcePartition.asJava, offset.asJava, setting.target, keySchema, setting.source, valueSchema,
      ChangeFeedStructBuilder(feed))
  }
} 
Example 36
Source File: PulsarSourceTask.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.pulsar.source

import java.util
import java.util.UUID

import com.datamountaineer.streamreactor.connect.converters.source.Converter
import com.datamountaineer.streamreactor.connect.pulsar.config.{PulsarConfigConstants, PulsarSourceConfig, PulsarSourceSettings}
import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter}
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.connect.source.{SourceRecord, SourceTask}
import org.apache.pulsar.client.api.{ClientConfiguration, PulsarClient}
import org.apache.pulsar.client.impl.auth.AuthenticationTls
import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException

import scala.collection.JavaConverters._
import scala.util.{Failure, Success, Try}

class PulsarSourceTask extends SourceTask with StrictLogging {
  private val progressCounter = new ProgressCounter
  private var enableProgress: Boolean = false
  private var pulsarManager: Option[PulsarManager] = None
  private val manifest = JarManifest(getClass.getProtectionDomain.getCodeSource.getLocation)

  override def start(props: util.Map[String, String]): Unit = {

    logger.info(scala.io.Source.fromInputStream(this.getClass.getResourceAsStream("/pulsar-source-ascii.txt")).mkString + s" $version")
    logger.info(manifest.printManifest())

    val conf = if (context.configs().isEmpty) props else context.configs()

    implicit val settings = PulsarSourceSettings(PulsarSourceConfig(conf), props.getOrDefault("tasks.max", "1").toInt)


    val name = conf.getOrDefault("name", s"kafka-connect-pulsar-source-${UUID.randomUUID().toString}")
    val convertersMap = buildConvertersMap(conf, settings)

    val messageConverter = PulsarMessageConverter(
      convertersMap,
      settings.kcql,
      settings.throwOnConversion,
      settings.pollingTimeout,
      settings.batchSize)

    val clientConf = new ClientConfiguration()

    settings.sslCACertFile.foreach(f => {
      clientConf.setUseTls(true)
      clientConf.setTlsTrustCertsFilePath(f)

      val authParams = settings.sslCertFile.map(f => ("tlsCertFile", f)).toMap ++ settings.sslCertKeyFile.map(f => ("tlsKeyFile", f)).toMap
      clientConf.setAuthentication(classOf[AuthenticationTls].getName, authParams.asJava)
    })

    pulsarManager = Some(new PulsarManager(PulsarClient.create(settings.connection, clientConf), name, settings.kcql, messageConverter))
    enableProgress = settings.enableProgress
  }

  def buildConvertersMap(props: util.Map[String, String], settings: PulsarSourceSettings): Map[String, Converter] = {
    settings.sourcesToConverters.map { case (topic, clazz) =>
      logger.info(s"Creating converter instance for $clazz")
      val converter = Try(Class.forName(clazz).newInstance()) match {
        case Success(value) => value.asInstanceOf[Converter]
        case Failure(_) => throw new ConfigException(s"Invalid ${PulsarConfigConstants.KCQL_CONFIG} is invalid. $clazz should have an empty ctor!")
      }
      import scala.collection.JavaConverters._
      converter.initialize(props.asScala.toMap)
      topic -> converter
    }
  }

  
  override def stop(): Unit = {
    logger.info("Stopping Pulsar source.")
    pulsarManager.foreach(_.close())
    progressCounter.empty
  }

  override def version: String = manifest.version()
} 
Example 37
Source File: PulsarMessageConverterTest.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.pulsar.source

import java.util

import com.datamountaineer.streamreactor.connect.pulsar.config.{PulsarConfigConstants, PulsarSourceConfig, PulsarSourceSettings}
import com.datamountaineer.streamreactor.connect.schemas.ConverterUtil
import org.apache.kafka.connect.source.SourceRecord
import org.apache.pulsar.client.api.MessageBuilder
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec

import scala.collection.JavaConverters._


class PulsarMessageConverterTest extends AnyWordSpec with Matchers with ConverterUtil {

  val pulsarTopic = "persistent://landoop/standalone/connect/kafka-topic"
  val jsonMessage = "{\"int8\":12,\"int16\":12,\"int32\":12,\"int64\":12,\"float32\":12.2,\"float64\":12.2,\"boolean\":true,\"string\":\"foo\"}"

  "should convert messages" in {
    val props =  Map(
      PulsarConfigConstants.HOSTS_CONFIG -> "pulsar://localhost:6650",
      PulsarConfigConstants.KCQL_CONFIG -> s"INSERT INTO kafka_topic SELECT * FROM $pulsarTopic BATCH = 10",
      PulsarConfigConstants.THROW_ON_CONVERT_ERRORS_CONFIG -> "true",
      PulsarConfigConstants.POLLING_TIMEOUT_CONFIG -> "500"
    ).asJava

    val config = PulsarSourceConfig(props)
    val settings = PulsarSourceSettings(config, 1)

    // test part of the task here aswell
    val task = new PulsarSourceTask()
    val convertersMap = task.buildConvertersMap(props, settings)

    val converter = PulsarMessageConverter(convertersMap, settings.kcql, false, 100, 100)

    val message = MessageBuilder
      .create
      .setContent(jsonMessage.getBytes)
      .setKey("landoop")
      .setSequenceId(1)
      .build()


    // pulsar message
    converter.convertMessages(message, pulsarTopic)

    val list = new util.ArrayList[SourceRecord]()
    converter.getRecords(list)
    list.size shouldBe 1
    val record = list.get(0)
    record.key().toString shouldBe "landoop"
    record.value().asInstanceOf[Array[Byte]].map(_.toChar).mkString shouldBe jsonMessage
  }
}