package cn.hashdata.bireme.pipeline;

import java.util.Date;
import java.util.HashMap;
import java.util.Properties;
import java.util.concurrent.LinkedBlockingQueue;

import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.errors.InterruptException;

import com.codahale.metrics.Timer;

import cn.hashdata.bireme.AbstractCommitCallback;
import cn.hashdata.bireme.BiremeException;
import cn.hashdata.bireme.ChangeSet;
import cn.hashdata.bireme.CommitCallback;
import cn.hashdata.bireme.Context;
import cn.hashdata.bireme.Row;
import cn.hashdata.bireme.RowSet;

 * {@code KafkaPipeLine} is a kind of {@code PipeLine} that polls data from Kafka.
 * @author yuze
public abstract class KafkaPipeLine extends PipeLine {
  private final long POLL_TIMEOUT = 100L;

  protected KafkaConsumer<String, String> consumer;
  protected LinkedBlockingQueue<KafkaCommitCallback> commitCallbacks;

  public KafkaPipeLine(Context cxt, SourceConfig conf, String myName) {
    super(cxt, conf, myName);
    consumer = KafkaPipeLine.createConsumer(conf.server, conf.groupID);
    commitCallbacks = new LinkedBlockingQueue<KafkaCommitCallback>();

  public ChangeSet pollChangeSet() throws BiremeException {
    ConsumerRecords<String, String> records = null;

    try {
      records = consumer.poll(POLL_TIMEOUT);
    } catch (InterruptException e) {

    if (cxt.stop || records == null || records.isEmpty()) {
      return null;

    KafkaCommitCallback callback = new KafkaCommitCallback();

    if (!commitCallbacks.offer(callback)) {
      String Message = "Can't add CommitCallback to queue.";
      throw new BiremeException(Message);


    return packRecords(records, callback);

  public void checkAndCommit() {
    CommitCallback callback = null;

    while (!commitCallbacks.isEmpty()) {
      if (commitCallbacks.peek().ready()) {
        callback = commitCallbacks.remove();
      } else {

    if (callback != null) {

  private ChangeSet packRecords(
      ConsumerRecords<String, String> records, KafkaCommitCallback callback) {
    ChangeSet changeSet = new ChangeSet();
    changeSet.createdAt = new Date();
    changeSet.changes = records;
    changeSet.callback = callback;

    return changeSet;

   * Loop through the {@code ChangeSet} and transform each change data into a {@code Row}.
   * @author yuze
  public abstract class KafkaTransformer extends Transformer {
    public void fillRowSet(RowSet rowSet) throws BiremeException {
      CommitCallback callback = changeSet.callback;
      HashMap<String, Long> offsets = ((KafkaCommitCallback) callback).partitionOffset;
      Row row = null;

      for (ConsumerRecord<String, String> change :
          (ConsumerRecords<String, String>) changeSet.changes) {
        row = new Row();

        if (!transform(change, row)) {

        addToRowSet(row, rowSet);
        offsets.put(change.topic() + "+" + change.partition(), change.offset());

      rowSet.callback = callback;

     * Transform the change data into a {@code Row}.
     * @param change the change data
     * @param row an empty {@code Row} to store the result.
     * @return {@code true} if transform the change data successfully, {@code false} it the change
     *         data is null or filtered
     * @throws BiremeException when can not get the field
    public abstract boolean transform(ConsumerRecord<String, String> change, Row row)
        throws BiremeException;

   * {@code KafkaCommitCallback} is used to trace a {@code ChangeSet} polled from Kafka. After the
   * change data has been applied, commit the offset to Kafka.
   * @author yuze
  public class KafkaCommitCallback extends AbstractCommitCallback {
    public HashMap<String, Long> partitionOffset;
    private Timer.Context timerCTX;
    private Date start;

    public KafkaCommitCallback() {
      this.partitionOffset = new HashMap<String, Long>();

      // record the time being created
      timerCTX = stat.avgDelay.time();
      start = new Date();

    public void commit() {
      HashMap<TopicPartition, OffsetAndMetadata> offsets =
          new HashMap<TopicPartition, OffsetAndMetadata>();

      partitionOffset.forEach((key, value) -> {
        String topic = key.split("\\+")[0];
        int partition = Integer.valueOf(key.split("\\+")[1]);
        offsets.put(new TopicPartition(topic, partition), new OffsetAndMetadata(value + 1));


      // record the time being committed

      stat.newestCompleted = newestRecord;
      stat.delay = new Date().getTime() - start.getTime();

    public void destory() {
      partitionOffset = null;
      timerCTX = null;
      start = null;

   * Create a new KafkaConsumer, specify the server's ip and port, and groupID.
   * @param server ip and port for Kafka server
   * @param groupID consumer's group id
   * @return the consumer
  public static KafkaConsumer<String, String> createConsumer(String server, String groupID) {
    Properties props = new Properties();
    props.put("bootstrap.servers", server);
    props.put("", groupID);
    props.put("", false);
    props.put("", 60000);
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("auto.offset.reset", "earliest");
    return new KafkaConsumer<String, String>(props);