org.apache.hadoop.hbase.client.Scan Scala Examples

The following examples show how to use org.apache.hadoop.hbase.client.Scan. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: HBaseServiceLayer.scala    From Taxi360   with Apache License 2.0 5 votes vote down vote up
package com.hadooparchitecturebook.taxi360.server.hbase

import javax.ws.rs._
import javax.ws.rs.core.MediaType

import com.hadooparchitecturebook.taxi360.model.NyTaxiYellowTrip
import com.hadooparchitecturebook.taxi360.streaming.ingestion.hbase.TaxiTripHBaseHelper
import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
import org.apache.hadoop.hbase.client.{ConnectionFactory, Scan}
import org.apache.hadoop.hbase.util.Bytes

import scala.collection.mutable

@Path("rest")
class HBaseServiceLayer {

  @GET
  @Path("hello")
  @Produces(Array(MediaType.TEXT_PLAIN))
  def hello(): String = {
    "Hello World"
  }

  @GET
  @Path("vender/{venderId}/timeline")
  @Produces(Array(MediaType.APPLICATION_JSON))
  def getTripTimeLine (@PathParam("venderId") venderId:String,
                          @QueryParam("startTime") startTime:String = Long.MinValue.toString,
                          @QueryParam("endTime")  endTime:String = Long.MaxValue.toString): Array[NyTaxiYellowTrip] = {

    val table = HBaseGlobalValues.connection.getTable(TableName.valueOf(HBaseGlobalValues.appEventTableName))

    val st = if (startTime == null) {
      Long.MinValue.toString
    } else {
      startTime
    }
    val et = if (endTime == null) {
      Long.MaxValue.toString
    } else {
      endTime
    }

    val scan = new Scan()
    val startRowKey = TaxiTripHBaseHelper.generateRowKey(venderId, st.toLong, HBaseGlobalValues.numberOfSalts)
    println("startRowKey:" + Bytes.toString(startRowKey))
    scan.setStartRow(startRowKey)
    val endRowKey = TaxiTripHBaseHelper.generateRowKey(venderId, et.toLong, HBaseGlobalValues.numberOfSalts)
    println("endRowKey:" + Bytes.toString(endRowKey))
    scan.setStopRow(endRowKey)

    val scannerIt = table.getScanner(scan).iterator()

    val tripList = new mutable.MutableList[NyTaxiYellowTrip]

    while(scannerIt.hasNext) {
      val result = scannerIt.next()
      tripList += TaxiTripHBaseHelper.convertToTaxiTrip(result)
      println("Found a trip:" + TaxiTripHBaseHelper.convertToTaxiTrip(result))
    }

    println("tripList.size:" + tripList.size)

    tripList.toArray
  }

} 
Example 2
Source File: HbaseScanner.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.hbase

import io.eels.component.hbase.HbaseScanner.copy
import io.eels.schema.StructType
import org.apache.hadoop.hbase.client.Scan


object HbaseScanner {

  def apply(schema: StructType, hbaseSource: HbaseSource)(implicit serializer: HbaseSerializer): Scan = {
    val scan = new Scan
    val keyField = schema.fields.find(_.key).getOrElse(sys.error("HBase requires a single column to be define as a key"))
    hbaseSource.cacheBlocks.filterNot(_ == None).map(scan.setCacheBlocks)
    hbaseSource.caching.filterNot(_ == None).map(scan.setCaching)
    hbaseSource.batch.filterNot(_ == None).map(scan.setBatch)
    hbaseSource.startKey.filterNot(_ == None).map(startKey => scan.withStartRow(copy(serializer.toBytes(startKey, keyField.name, keyField.dataType))))
    hbaseSource.stopKey.map { key =>
      val stopKey = copy(serializer.toBytes(key, keyField.name, keyField.dataType))
      // If the stop key is marked as inclusive then increment the last byte by one - not fully tested
      if (hbaseSource.stopKeyInclusive) {
        val lastByteIncremented = (stopKey.last.toShort + 1).toByte
        stopKey(stopKey.length - 1) = if (lastByteIncremented > stopKey.last) lastByteIncremented else stopKey.last
      }
      scan.withStopRow(stopKey)
    }
    hbaseSource.consistency.filterNot(_ == None).map(scan.setConsistency)
    hbaseSource.isolationLevel.filterNot(_ == None).map(scan.setIsolationLevel)
    hbaseSource.timeRange.filterNot(_ == None).map(t => scan.setTimeRange(t._1, t._2))
    hbaseSource.timeStamp.filterNot(_ == None).map(scan.setTimeStamp)
    hbaseSource.maxVersions.filterNot(_ == None).map(scan.setMaxVersions)
    hbaseSource.maxResultsPerColumnFamily.filterNot(_ == None).map(scan.setMaxResultsPerColumnFamily)
    hbaseSource.rowOffsetPerColumnFamily.filterNot(_ == None).map(scan.setRowOffsetPerColumnFamily)
    hbaseSource.maxResultSize.filterNot(_ == None).map(scan.setMaxResultSize)
    hbaseSource.reverseScan.filterNot(_ == None).map(scan.setReversed)
    hbaseSource.allowPartialResults.filterNot(_ == None).map(scan.setAllowPartialResults)
    hbaseSource.loadColumnFamiliesOnDemand.filterNot(_ == None).map(scan.setLoadColumnFamiliesOnDemand)
    hbaseSource.returnDeletedRows.filterNot(_ == None).map(scan.setRaw)
    hbaseSource.identifier.filterNot(_ == None).map(scan.setId)
    hbaseSource.rowPrefixFilter.filterNot(_ == None).map(scan.setRowPrefixFilter)

    // Setup predicate push downs
    hbaseSource.filterList.filterNot(_ == None).map(scan.setFilter)

    // Set up column projection schema
    schema.fields
      .filter(!_.key)
      .foreach(f => scan.addColumn(f.columnFamily.get.getBytes, f.name.getBytes))

    scan
  }

  private def copy(sourceArray: Array[Byte]): Array[Byte] = {
    val bufferCopy = new Array[Byte](sourceArray.length)
    System.arraycopy(sourceArray, 0, bufferCopy, 0, bufferCopy.length)
    bufferCopy
  }
} 
Example 3
Source File: HbasePublisher.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.hbase

import java.util
import java.util.concurrent.atomic.AtomicBoolean

import com.sksamuel.exts.io.Using
import com.sksamuel.exts.metrics.Timed
import io.eels.Row
import io.eels.datastream.{Publisher, Subscriber, Subscription}
import io.eels.schema.StructType
import org.apache.hadoop.hbase.TableName
import org.apache.hadoop.hbase.client.{Connection, Result, Scan}

import scala.collection.mutable.ArrayBuffer

class HbasePublisher(connection: Connection,
                     schema: StructType,
                     namespace: String,
                     tableName: String,
                     bufferSize: Int,
                     maxRows: Long,
                     scanner: Scan,
                     implicit val serializer: HbaseSerializer) extends Publisher[Seq[Row]] with Timed with Using {

  private val table = connection.getTable(TableName.valueOf(namespace, tableName))

  override def subscribe(subscriber: Subscriber[Seq[Row]]): Unit = {
    try {
      using(new CloseableIterator) { rowIter =>
        val running = new AtomicBoolean(true)
        subscriber.subscribed(Subscription.fromRunning(running))
        val buffer = new ArrayBuffer[Row](bufferSize)
        while (rowIter.hasNext && running.get()) {
          buffer append rowIter.next()
          if (buffer.size == bufferSize) {
            subscriber.next(buffer.toVector)
            buffer.clear()
          }
        }
        if (buffer.nonEmpty) subscriber.next(buffer.toVector)
        subscriber.completed()
      }
    } catch {
      case t: Throwable => subscriber.error(t)
    }
  }

  class CloseableIterator extends Iterator[Row] with AutoCloseable {
    private val resultScanner = table.getScanner(scanner)
    private val resultScannerIter = resultScanner.iterator()
    private var rowCount = 0
    private var iter: Iterator[Row] = Iterator.empty

    override def hasNext: Boolean = rowCount < maxRows && iter.hasNext || {
      if (rowCount < maxRows && resultScannerIter.hasNext) {
        iter = HBaseResultsIterator(schema, resultScannerIter)
        iter.hasNext
      } else false
    }

    override def next(): Row = {
      rowCount += 1
      iter.next()
    }

    override def close(): Unit = {
      resultScanner.close()
    }
  }

  case class HBaseResultsIterator(schema: StructType, resultIter: util.Iterator[Result])(implicit serializer: HbaseSerializer) extends Iterator[Row] {
    override def hasNext: Boolean = resultIter.hasNext

    override def next(): Row = {
      val resultRow = resultIter.next()
      val values = schema.fields.map { field =>
        if (!field.key) {
          val value = resultRow.getValue(field.columnFamily.getOrElse(sys.error(s"No Column Family defined for field '${field.name}'")).getBytes, field.name.getBytes)
          if (value != null) serializer.fromBytes(value, field.name, field.dataType) else null
        } else serializer.fromBytes(resultRow.getRow, field.name, field.dataType)
      }
      Row(schema, values)
    }
  }


} 
Example 4
Source File: HBaseDistributedScanExample.scala    From SparkOnHBase   with Apache License 2.0 5 votes vote down vote up
package org.apache.hadoop.hbase.spark.example.hbasecontext

import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.spark.SparkContext
import org.apache.hadoop.hbase.{TableName, HBaseConfiguration}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.client.Scan
import org.apache.spark.SparkConf

object HBaseDistributedScanExample {
  def main(args: Array[String]) {
    if (args.length < 1) {
      println("GenerateGraphs {tableName}")
      return
    }

    val tableName = args(0)

    val sparkConf = new SparkConf().setAppName("HBaseDistributedScanExample " + tableName )
    val sc = new SparkContext(sparkConf)

    try {
      val conf = HBaseConfiguration.create()

      val hbaseContext = new HBaseContext(sc, conf)

      val scan = new Scan()
      scan.setCaching(100)

      val getRdd = hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan)

      getRdd.foreach(v => println(Bytes.toString(v._1.get())))

      println("Length: " + getRdd.map(r => r._1.copyBytes()).collect().length);

        //.collect().foreach(v => println(Bytes.toString(v._1.get())))
    } finally {
      sc.stop()
    }
  }

} 
Example 5
Source File: HogHBaseReputation.scala    From hogzilla   with GNU General Public License v2.0 5 votes vote down vote up
package org.hogzilla.hbase




import scala.math.random
import java.lang.Math
import org.apache.spark._
import org.apache.hadoop.hbase.client.HBaseAdmin
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor, TableName}
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.spark.mllib.regression.{LabeledPoint,LinearRegressionModel,LinearRegressionWithSGD}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.rdd.RDD
import org.apache.hadoop.hbase.client.HTable
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter
import org.apache.hadoop.hbase.filter.BinaryComparator
import org.apache.hadoop.hbase.filter.FilterList
import org.apache.hadoop.hbase.filter.CompareFilter
import java.util.ArrayList
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.filter.Filter
import scala.collection.mutable.HashSet
import org.apache.hadoop.hbase.client.Put


object HogHBaseReputation {

  // Ex: MX, whitelist
	def getReputationList(listName:String, listType:String):Set[String] =
	{
		val list =  new HashSet[String]


	  val filters: ArrayList[Filter] = new ArrayList();

		val colValFilter1 = new SingleColumnValueFilter(Bytes.toBytes("rep"), Bytes.toBytes("list_type"),
				CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(listType)))
		colValFilter1.setFilterIfMissing(false);

		val colValFilter2 = new SingleColumnValueFilter(Bytes.toBytes("rep"), Bytes.toBytes("list"),
				CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(listName)))
		colValFilter2.setFilterIfMissing(false);

		filters.add(colValFilter1);
		filters.add(colValFilter2);

		val filterList = new FilterList( FilterList.Operator.MUST_PASS_ALL, filters);
		val scan = new Scan()
		scan.setFilter(filterList)
    
		val it = HogHBaseRDD.hogzilla_reputation.getScanner(scan).iterator()
		
    while(it.hasNext())
		{
      list.add( Bytes.toString(it.next().getValue(Bytes.toBytes("rep"),Bytes.toBytes("ip"))) )
		}
    
    list.toSet

	}
  
 def saveReputationList(listName:String, listType:String, ip:String) =
 {
     val put = new Put(Bytes.toBytes(ip+"-"+listName+"-"+listType))
     put.add(Bytes.toBytes("rep"), Bytes.toBytes("list_type"), Bytes.toBytes(listType))
     put.add(Bytes.toBytes("rep"), Bytes.toBytes("list"), Bytes.toBytes(listName))
     put.add(Bytes.toBytes("rep"), Bytes.toBytes("ip"), Bytes.toBytes(ip))
     
     HogHBaseRDD.hogzilla_reputation.put(put)
 }

} 
Example 6
Source File: HBaseDistributedScanExample.scala    From hbase-connectors   with Apache License 2.0 5 votes vote down vote up
package org.apache.hadoop.hbase.spark.example.hbasecontext

import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.TableName
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.yetus.audience.InterfaceAudience

@InterfaceAudience.Private
object HBaseDistributedScanExample {
  def main(args: Array[String]) {
    if (args.length < 1) {
      println("HBaseDistributedScanExample {tableName} missing an argument")
      return
    }

    val tableName = args(0)

    val sparkConf = new SparkConf().setAppName("HBaseDistributedScanExample " + tableName )
    val sc = new SparkContext(sparkConf)

    try {
      val conf = HBaseConfiguration.create()

      val hbaseContext = new HBaseContext(sc, conf)

      val scan = new Scan()
      scan.setCaching(100)

      val getRdd = hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan)

      getRdd.foreach(v => println(Bytes.toString(v._1.get())))

      println("Length: " + getRdd.map(r => r._1.copyBytes()).collect().length);
    } finally {
      sc.stop()
    }
  }

} 
Example 7
Source File: HbaseReaderHelper.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.hbase.writers

import com.datamountaineer.streamreactor.connect.hbase.BytesHelper._
import com.datamountaineer.streamreactor.connect.hbase.HbaseHelper
import org.apache.hadoop.hbase.client.{Connection, ConnectionFactory, Scan}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{CellUtil, HBaseConfiguration, TableName}

import scala.collection.JavaConverters._

object HbaseReaderHelper {
  def createConnection: Connection = {
    ConnectionFactory.createConnection(HBaseConfiguration.create())
  }

  def getAllRecords(tableName: String, columnFamily: String)(implicit connection: Connection): List[HbaseRowData] = {
    HbaseHelper.withTable(TableName.valueOf(tableName)) { tbl =>
      val scan = new Scan()
      scan.addFamily(columnFamily.fromString())
      val scanner = tbl.getScanner(scan)
      scanner.asScala.map { rs =>
        val cells = rs.rawCells().map { cell =>
          Bytes.toString(CellUtil.cloneQualifier(cell)) -> CellUtil.cloneValue(cell)
        }.toMap
        HbaseRowData(rs.getRow, cells)
      }.toList
    }
  }

}

case class HbaseRowData(key: Array[Byte], cells: Map[String, Array[Byte]]) 
Example 8
Source File: SHC.scala    From shc   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import java.io.File

import com.google.common.io.Files
import org.apache.hadoop.hbase.{HColumnDescriptor, HTableDescriptor, TableName, HBaseTestingUtility}
import org.apache.hadoop.hbase.client.{Scan, Put, ConnectionFactory, Table}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.sql.execution.datasources.hbase.SparkHBaseConf
import org.apache.spark.sql.types.UTF8String
import org.apache.spark.{SparkContext, SparkConf, Logging}
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
import scala.collection.JavaConverters._

class SHC  extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll  with Logging {
  implicit class StringToColumn(val sc: StringContext) {
    def $(args: Any*): ColumnName = {
      new ColumnName(sc.s(args: _*))
    }
  }


  private[spark] var htu = HBaseTestingUtility.createLocalHTU()
  private[spark] def tableName = "table1"

  private[spark] def columnFamilies: Array[String] = Array.tabulate(9){ x=> s"cf$x"}
  var table: Table = null
  val conf = new SparkConf
  conf.set(SparkHBaseConf.testConf, "true")
  SparkHBaseConf.conf = htu.getConfiguration
  // private[spark] var columnFamilyStr = Bytes.toString(columnFamily)

  def catalog = s"""{
            |"table":{"namespace":"default", "name":"table1"},
            |"rowkey":"key",
            |"columns":{
              |"col0":{"cf":"rowkey", "col":"key", "type":"string"},
              |"col1":{"cf":"cf1", "col":"col1", "type":"boolean"},
              |"col2":{"cf":"cf2", "col":"col2", "type":"double"},
              |"col3":{"cf":"cf3", "col":"col3", "type":"float"},
              |"col4":{"cf":"cf4", "col":"col4", "type":"int"},
              |"col5":{"cf":"cf5", "col":"col5", "type":"bigint"},
              |"col6":{"cf":"cf6", "col":"col6", "type":"smallint"},
              |"col7":{"cf":"cf7", "col":"col7", "type":"string"},
              |"col8":{"cf":"cf8", "col":"col8", "type":"tinyint"}
            |}
          |}""".stripMargin

  override def beforeAll() {
    val tempDir: File = Files.createTempDir
    tempDir.deleteOnExit
    htu.cleanupTestDir
    htu.startMiniZKCluster
    htu.startMiniHBaseCluster(1, 4)
    logInfo(" - minicluster started")
    println(" - minicluster started")

  }

  override def afterAll() {
    try {
      table.close()
      println("shutdown")
      htu.deleteTable(TableName.valueOf(tableName))
      logInfo("shuting down minicluster")
      htu.shutdownMiniHBaseCluster
      htu.shutdownMiniZKCluster
      logInfo(" - minicluster shut down")
      htu.cleanupTestDir
    } catch {
      case _ => logError("teardown error")
    }
  }

  def createTable(name: String, cfs: Array[String]) {
    val tName = Bytes.toBytes(name)
    val bcfs = cfs.map(Bytes.toBytes(_))
    try {
      htu.deleteTable(TableName.valueOf(tName))
    } catch {
      case _ =>
        logInfo(" - no table " + name + " found")
    }
    htu.createMultiRegionTable(TableName.valueOf(tName), bcfs)
  }


  def createTable(name: Array[Byte], cfs: Array[Array[Byte]]) {
    try {
      htu.deleteTable(TableName.valueOf(name))
    } catch {
      case _ =>
        logInfo(" - no table " + Bytes.toString(name) + " found")
    }
    htu.createMultiRegionTable(TableName.valueOf(name), cfs)
  }
} 
Example 9
Source File: HBaseServiceLayer.scala    From Taxi360   with Apache License 2.0 5 votes vote down vote up
package com.cloudera.sa.taxi360.server.hbase

import javax.ws.rs._
import javax.ws.rs.core.MediaType

import com.cloudera.sa.taxi360.model.NyTaxiYellowTrip
import com.cloudera.sa.taxi360.streaming.ingestion.hbase.TaxiTripHBaseHelper
import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
import org.apache.hadoop.hbase.client.{ConnectionFactory, Scan}
import org.apache.hadoop.hbase.util.Bytes

import scala.collection.mutable

@Path("rest")
class HBaseServiceLayer {

  @GET
  @Path("hello")
  @Produces(Array(MediaType.TEXT_PLAIN))
  def hello(): String = {
    "Hello World"
  }

  @GET
  @Path("vender/{venderId}/timeline")
  @Produces(Array(MediaType.APPLICATION_JSON))
  def getTripTimeLine (@PathParam("venderId") venderId:String,
                          @QueryParam("startTime") startTime:String = Long.MinValue.toString,
                          @QueryParam("endTime")  endTime:String = Long.MaxValue.toString): Array[NyTaxiYellowTrip] = {

    val table = HBaseGlobalValues.connection.getTable(TableName.valueOf(HBaseGlobalValues.appEventTableName))

    val st = if (startTime == null) {
      Long.MinValue.toString
    } else {
      startTime
    }
    val et = if (endTime == null) {
      Long.MaxValue.toString
    } else {
      endTime
    }

    val scan = new Scan()
    val startRowKey = TaxiTripHBaseHelper.generateRowKey(venderId, st.toLong, HBaseGlobalValues.numberOfSalts)
    println("startRowKey:" + Bytes.toString(startRowKey))
    scan.setStartRow(startRowKey)
    val endRowKey = TaxiTripHBaseHelper.generateRowKey(venderId, et.toLong, HBaseGlobalValues.numberOfSalts)
    println("endRowKey:" + Bytes.toString(endRowKey))
    scan.setStopRow(endRowKey)

    val scannerIt = table.getScanner(scan).iterator()

    val tripList = new mutable.MutableList[NyTaxiYellowTrip]

    while(scannerIt.hasNext) {
      val result = scannerIt.next()
      tripList += TaxiTripHBaseHelper.convertToTaxiTrip(result)
      println("Found a trip:" + TaxiTripHBaseHelper.convertToTaxiTrip(result))
    }

    println("tripList.size:" + tripList.size)

    tripList.toArray
  }

}