package org.apache.sqoop.hbase;

import java.io.Closeable;
import java.io.IOException;
import java.util.List;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.sqoop.mapreduce.ImportJobBase;

import com.cloudera.sqoop.lib.FieldMappable;
import com.cloudera.sqoop.lib.FieldMapProcessor;
import com.cloudera.sqoop.lib.ProcessingException;

 * SqoopRecordProcessor that performs an HBase "put" operation
 * that contains all the fields of the record.
public class HBasePutProcessor implements Closeable, Configurable,
    FieldMapProcessor {

  public static final Log LOG = LogFactory.getLog(

  /** Configuration key specifying the table to insert into. */
  public static final String TABLE_NAME_KEY = "sqoop.hbase.insert.table";

  /** Configuration key specifying the column family to insert into. */
  public static final String COL_FAMILY_KEY =

  /** Configuration key specifying the column of the input whose value
   * should be used as the row id.
  public static final String ROW_KEY_COLUMN_KEY =

   * Configuration key specifying the PutTransformer implementation to use.
  public static final String TRANSFORMER_CLASS_KEY =

   *  Configuration key to enable/disable hbase bulkLoad.
  public static final String BULK_LOAD_ENABLED_KEY =

  /** Configuration key to specify whether to add the row key column into
   *  HBase. Set to false by default.
  public static final String ADD_ROW_KEY = "sqoop.hbase.add.row.key";
  public static final boolean ADD_ROW_KEY_DEFAULT = false;

  private Configuration conf;

  // An object that can transform a map of fieldName->object
  // into a Put command.
  private PutTransformer putTransformer;

  private String tableName;
  private HTable table;

  public HBasePutProcessor() {

  public void setConf(Configuration config) {
    this.conf = config;

    // Get the implementation of PutTransformer to use.
    // By default, we call toString() on every non-null field.
    Class<? extends PutTransformer> xformerClass =
        (Class<? extends PutTransformer>)
        this.conf.getClass(TRANSFORMER_CLASS_KEY, ToStringPutTransformer.class);
    this.putTransformer = (PutTransformer)
        ReflectionUtils.newInstance(xformerClass, this.conf);
    if (null == putTransformer) {
      throw new RuntimeException("Could not instantiate PutTransformer.");

    this.putTransformer.setColumnFamily(conf.get(COL_FAMILY_KEY, null));
    this.putTransformer.setRowKeyColumn(conf.get(ROW_KEY_COLUMN_KEY, null));

    if (this.putTransformer instanceof ToStringPutTransformer) {
      ToStringPutTransformer stringPutTransformer =
          (ToStringPutTransformer) this.putTransformer;
      stringPutTransformer.bigDecimalFormatString =
      stringPutTransformer.addRowKey =

    this.tableName = conf.get(TABLE_NAME_KEY, null);
    try {
      this.table = new HTable(conf, this.tableName);
    } catch (IOException ioe) {
      throw new RuntimeException("Could not access HBase table " + tableName,

  public Configuration getConf() {
    return this.conf;

   * Processes a record by extracting its field map and converting
   * it into a list of Put commands into HBase.
  public void accept(FieldMappable record)
      throws IOException, ProcessingException {
    Map<String, Object> fields = record.getFieldMap();

    List<Put> putList = putTransformer.getPutCommand(fields);
    if (null != putList) {
      for (Put put : putList) {
        if (put!=null) {
          if (put.isEmpty()) {
            LOG.warn("Could not insert row with no columns "
                + "for row-key column: " + Bytes.toString(put.getRow()));
          } else {

   * Closes the HBase table and commits all pending operations.
  public void close() throws IOException {