org.apache.hadoop.hbase.client.Put Scala Examples

The following examples show how to use org.apache.hadoop.hbase.client.Put. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

Example 1

Source File: HogHBaseReputation.scala From hogzilla with GNU General Public License v2.0

5 votes

package org.hogzilla.hbase




import scala.math.random
import java.lang.Math
import org.apache.spark._
import org.apache.hadoop.hbase.client.HBaseAdmin
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor, TableName}
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.spark.mllib.regression.{LabeledPoint,LinearRegressionModel,LinearRegressionWithSGD}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.rdd.RDD
import org.apache.hadoop.hbase.client.HTable
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter
import org.apache.hadoop.hbase.filter.BinaryComparator
import org.apache.hadoop.hbase.filter.FilterList
import org.apache.hadoop.hbase.filter.CompareFilter
import java.util.ArrayList
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.filter.Filter
import scala.collection.mutable.HashSet
import org.apache.hadoop.hbase.client.Put


object HogHBaseReputation {

  // Ex: MX, whitelist
	def getReputationList(listName:String, listType:String):Set[String] =
	{
		val list =  new HashSet[String]


	  val filters: ArrayList[Filter] = new ArrayList();

		val colValFilter1 = new SingleColumnValueFilter(Bytes.toBytes("rep"), Bytes.toBytes("list_type"),
				CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(listType)))
		colValFilter1.setFilterIfMissing(false);

		val colValFilter2 = new SingleColumnValueFilter(Bytes.toBytes("rep"), Bytes.toBytes("list"),
				CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(listName)))
		colValFilter2.setFilterIfMissing(false);

		filters.add(colValFilter1);
		filters.add(colValFilter2);

		val filterList = new FilterList( FilterList.Operator.MUST_PASS_ALL, filters);
		val scan = new Scan()
		scan.setFilter(filterList)
    
		val it = HogHBaseRDD.hogzilla_reputation.getScanner(scan).iterator()
		
    while(it.hasNext())
		{
      list.add( Bytes.toString(it.next().getValue(Bytes.toBytes("rep"),Bytes.toBytes("ip"))) )
		}
    
    list.toSet

	}
  
 def saveReputationList(listName:String, listType:String, ip:String) =
 {
     val put = new Put(Bytes.toBytes(ip+"-"+listName+"-"+listType))
     put.add(Bytes.toBytes("rep"), Bytes.toBytes("list_type"), Bytes.toBytes(listType))
     put.add(Bytes.toBytes("rep"), Bytes.toBytes("list"), Bytes.toBytes(listName))
     put.add(Bytes.toBytes("rep"), Bytes.toBytes("ip"), Bytes.toBytes(ip))
     
     HogHBaseRDD.hogzilla_reputation.put(put)
 }

}

Example 2

Source File: SHC.scala From shc with Apache License 2.0

5 votes

package org.apache.spark.sql

import java.io.File

import com.google.common.io.Files
import org.apache.hadoop.hbase.{HColumnDescriptor, HTableDescriptor, TableName, HBaseTestingUtility}
import org.apache.hadoop.hbase.client.{Scan, Put, ConnectionFactory, Table}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.sql.execution.datasources.hbase.SparkHBaseConf
import org.apache.spark.sql.types.UTF8String
import org.apache.spark.{SparkContext, SparkConf, Logging}
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
import scala.collection.JavaConverters._

class SHC  extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll  with Logging {
  implicit class StringToColumn(val sc: StringContext) {
    def $(args: Any*): ColumnName = {
      new ColumnName(sc.s(args: _*))
    }
  }


  private[spark] var htu = HBaseTestingUtility.createLocalHTU()
  private[spark] def tableName = "table1"

  private[spark] def columnFamilies: Array[String] = Array.tabulate(9){ x=> s"cf$x"}
  var table: Table = null
  val conf = new SparkConf
  conf.set(SparkHBaseConf.testConf, "true")
  SparkHBaseConf.conf = htu.getConfiguration
  // private[spark] var columnFamilyStr = Bytes.toString(columnFamily)

  def catalog = s"""{
            |"table":{"namespace":"default", "name":"table1"},
            |"rowkey":"key",
            |"columns":{
              |"col0":{"cf":"rowkey", "col":"key", "type":"string"},
              |"col1":{"cf":"cf1", "col":"col1", "type":"boolean"},
              |"col2":{"cf":"cf2", "col":"col2", "type":"double"},
              |"col3":{"cf":"cf3", "col":"col3", "type":"float"},
              |"col4":{"cf":"cf4", "col":"col4", "type":"int"},
              |"col5":{"cf":"cf5", "col":"col5", "type":"bigint"},
              |"col6":{"cf":"cf6", "col":"col6", "type":"smallint"},
              |"col7":{"cf":"cf7", "col":"col7", "type":"string"},
              |"col8":{"cf":"cf8", "col":"col8", "type":"tinyint"}
            |}
          |}""".stripMargin

  override def beforeAll() {
    val tempDir: File = Files.createTempDir
    tempDir.deleteOnExit
    htu.cleanupTestDir
    htu.startMiniZKCluster
    htu.startMiniHBaseCluster(1, 4)
    logInfo(" - minicluster started")
    println(" - minicluster started")

  }

  override def afterAll() {
    try {
      table.close()
      println("shutdown")
      htu.deleteTable(TableName.valueOf(tableName))
      logInfo("shuting down minicluster")
      htu.shutdownMiniHBaseCluster
      htu.shutdownMiniZKCluster
      logInfo(" - minicluster shut down")
      htu.cleanupTestDir
    } catch {
      case _ => logError("teardown error")
    }
  }

  def createTable(name: String, cfs: Array[String]) {
    val tName = Bytes.toBytes(name)
    val bcfs = cfs.map(Bytes.toBytes(_))
    try {
      htu.deleteTable(TableName.valueOf(tName))
    } catch {
      case _ =>
        logInfo(" - no table " + name + " found")
    }
    htu.createMultiRegionTable(TableName.valueOf(tName), bcfs)
  }


  def createTable(name: Array[Byte], cfs: Array[Array[Byte]]) {
    try {
      htu.deleteTable(TableName.valueOf(name))
    } catch {
      case _ =>
        logInfo(" - no table " + Bytes.toString(name) + " found")
    }
    htu.createMultiRegionTable(TableName.valueOf(name), cfs)
  }
}

Example 3

Source File: HBaseForeachPartitionExample.scala From hbase-connectors with Apache License 2.0

5 votes

package org.apache.hadoop.hbase.spark.example.rdd

import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.TableName
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.yetus.audience.InterfaceAudience


@InterfaceAudience.Private
object HBaseForeachPartitionExample {
  def main(args: Array[String]) {
    if (args.length < 2) {
      println("HBaseForeachPartitionExample {tableName} {columnFamily} are missing an arguments")
      return
    }

    val tableName = args(0)
    val columnFamily = args(1)

    val sparkConf = new SparkConf().setAppName("HBaseForeachPartitionExample " +
      tableName + " " + columnFamily)
    val sc = new SparkContext(sparkConf)

    try {
      //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])]
      val rdd = sc.parallelize(Array(
        (Bytes.toBytes("1"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
        (Bytes.toBytes("2"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
        (Bytes.toBytes("3"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
        (Bytes.toBytes("4"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
        (Bytes.toBytes("5"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))
      ))

      val conf = HBaseConfiguration.create()

      val hbaseContext = new HBaseContext(sc, conf)


      rdd.hbaseForeachPartition(hbaseContext,
        (it, connection) => {
          val m = connection.getBufferedMutator(TableName.valueOf(tableName))

          it.foreach(r => {
            val put = new Put(r._1)
            r._2.foreach((putValue) =>
              put.addColumn(putValue._1, putValue._2, putValue._3))
            m.mutate(put)
          })
          m.flush()
          m.close()
        })

    } finally {
      sc.stop()
    }
  }
}

Example 4

Source File: HBaseBulkPutExample.scala From hbase-connectors with Apache License 2.0

5 votes

package org.apache.hadoop.hbase.spark.example.rdd

import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.TableName
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.yetus.audience.InterfaceAudience


@InterfaceAudience.Private
object HBaseBulkPutExample {
   def main(args: Array[String]) {
     if (args.length < 2) {
       println("HBaseBulkPutExample {tableName} {columnFamily} are missing an arguments")
       return
     }

     val tableName = args(0)
     val columnFamily = args(1)

     val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " +
       tableName + " " + columnFamily)
     val sc = new SparkContext(sparkConf)

     try {
       //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])]
       val rdd = sc.parallelize(Array(
         (Bytes.toBytes("1"),
           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
         (Bytes.toBytes("2"),
           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
         (Bytes.toBytes("3"),
           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
         (Bytes.toBytes("4"),
           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
         (Bytes.toBytes("5"),
           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))
       ))

       val conf = HBaseConfiguration.create()

       val hbaseContext = new HBaseContext(sc, conf)

       rdd.hbaseBulkPut(hbaseContext, TableName.valueOf(tableName),
         (putRecord) => {
           val put = new Put(putRecord._1)
           putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2,
             putValue._3))
           put
         })

     } finally {
       sc.stop()
     }
   }
 }

Example 5

Source File: HBaseStreamingBulkPutExample.scala From hbase-connectors with Apache License 2.0

5 votes

package org.apache.hadoop.hbase.spark.example.hbasecontext

import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.TableName
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.apache.yetus.audience.InterfaceAudience


@InterfaceAudience.Private
object HBaseStreamingBulkPutExample {
  def main(args: Array[String]) {
    if (args.length < 4) {
      println("HBaseStreamingBulkPutExample " +
        "{host} {port} {tableName} {columnFamily} are missing an argument")
      return
    }

    val host = args(0)
    val port = args(1)
    val tableName = args(2)
    val columnFamily = args(3)

    val sparkConf = new SparkConf().setAppName("HBaseStreamingBulkPutExample " +
      tableName + " " + columnFamily)
    val sc = new SparkContext(sparkConf)
    try {
      val ssc = new StreamingContext(sc, Seconds(1))

      val lines = ssc.socketTextStream(host, port.toInt)

      val conf = HBaseConfiguration.create()

      val hbaseContext = new HBaseContext(sc, conf)

      hbaseContext.streamBulkPut[String](lines,
        TableName.valueOf(tableName),
        (putRecord) => {
          if (putRecord.length() > 0) {
            val put = new Put(Bytes.toBytes(putRecord))
            put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("foo"), Bytes.toBytes("bar"))
            put
          } else {
            null
          }
        })
      ssc.start()
      ssc.awaitTerminationOrTimeout(60000)
    } finally {
      sc.stop()
    }
  }
}

Example 6

Source File: HBaseBulkPutExampleFromFile.scala From hbase-connectors with Apache License 2.0

5 votes

package org.apache.hadoop.hbase.spark.example.hbasecontext

import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.TableName
import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapred.TextInputFormat
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.yetus.audience.InterfaceAudience


@InterfaceAudience.Private
object HBaseBulkPutExampleFromFile {
  def main(args: Array[String]) {
    if (args.length < 3) {
      println("HBaseBulkPutExampleFromFile {tableName} {columnFamily} {inputFile} are missing an argument")
      return
    }

    val tableName = args(0)
    val columnFamily = args(1)
    val inputFile = args(2)

    val sparkConf = new SparkConf().setAppName("HBaseBulkPutExampleFromFile " +
      tableName + " " + columnFamily + " " + inputFile)
    val sc = new SparkContext(sparkConf)

    try {
      var rdd = sc.hadoopFile(
        inputFile,
        classOf[TextInputFormat],
        classOf[LongWritable],
        classOf[Text]).map(v => {
        System.out.println("reading-" + v._2.toString)
        v._2.toString
      })

      val conf = HBaseConfiguration.create()

      val hbaseContext = new HBaseContext(sc, conf)
      hbaseContext.bulkPut[String](rdd,
        TableName.valueOf(tableName),
        (putRecord) => {
          System.out.println("hbase-" + putRecord)
          val put = new Put(Bytes.toBytes("Value- " + putRecord))
          put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("1"),
            Bytes.toBytes(putRecord.length()))
          put
        });
    } finally {
      sc.stop()
    }
  }
}

Example 7

Source File: HBaseBulkPutExample.scala From hbase-connectors with Apache License 2.0

5 votes

package org.apache.hadoop.hbase.spark.example.hbasecontext

import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.TableName
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.yetus.audience.InterfaceAudience


@InterfaceAudience.Private
object HBaseBulkPutExample {
  def main(args: Array[String]) {
    if (args.length < 2) {
      println("HBaseBulkPutExample {tableName} {columnFamily} are missing an arguments")
      return
    }

    val tableName = args(0)
    val columnFamily = args(1)

    val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " +
      tableName + " " + columnFamily)
    val sc = new SparkContext(sparkConf)

    try {
      //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])]
      val rdd = sc.parallelize(Array(
        (Bytes.toBytes("1"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
        (Bytes.toBytes("2"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
        (Bytes.toBytes("3"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
        (Bytes.toBytes("4"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
        (Bytes.toBytes("5"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))
      ))

      val conf = HBaseConfiguration.create()

      val hbaseContext = new HBaseContext(sc, conf)
      hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd,
        TableName.valueOf(tableName),
        (putRecord) => {
          val put = new Put(putRecord._1)
          putRecord._2.foreach((putValue) =>
            put.addColumn(putValue._1, putValue._2, putValue._3))
          put
        });
    } finally {
      sc.stop()
    }
  }
}

Example 8

Source File: HBaseBulkPutTimestampExample.scala From hbase-connectors with Apache License 2.0

5 votes

package org.apache.hadoop.hbase.spark.example.hbasecontext

import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.spark.SparkContext
import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.client.Put
import org.apache.spark.SparkConf
import org.apache.yetus.audience.InterfaceAudience


@InterfaceAudience.Private
object HBaseBulkPutTimestampExample {
  def main(args: Array[String]) {
    if (args.length < 2) {
      System.out.println("HBaseBulkPutTimestampExample {tableName} {columnFamily} are missing an argument")
      return
    }

    val tableName = args(0)
    val columnFamily = args(1)

    val sparkConf = new SparkConf().setAppName("HBaseBulkPutTimestampExample " +
      tableName + " " + columnFamily)
    val sc = new SparkContext(sparkConf)

    try {

      val rdd = sc.parallelize(Array(
        (Bytes.toBytes("6"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
        (Bytes.toBytes("7"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
        (Bytes.toBytes("8"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
        (Bytes.toBytes("9"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
        (Bytes.toBytes("10"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))))

      val conf = HBaseConfiguration.create()

      val timeStamp = System.currentTimeMillis()

      val hbaseContext = new HBaseContext(sc, conf)
      hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd,
        TableName.valueOf(tableName),
        (putRecord) => {
          val put = new Put(putRecord._1)
          putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2,
            timeStamp, putValue._3))
          put
        })
    } finally {
      sc.stop()
    }
  }
}

Example 9

Source File: HBasePut.scala From gimel with Apache License 2.0

5 votes

package com.paypal.gimel.hbase.utilities

import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
import org.apache.hadoop.hbase.client.{ConnectionFactory, Put}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.sql.{DataFrame, SparkSession}

import com.paypal.gimel.hbase.conf.{HbaseClientConfiguration, HbaseConfigs}
import com.paypal.gimel.logger.Logger

object HBasePut {

  def apply(sparkSession: SparkSession): HBasePut = new HBasePut(sparkSession)

}

class HBasePut(sparkSession: SparkSession) {
  val logger = Logger()
  lazy val hbaseUtilities = HBaseUtilities(sparkSession)

  
  def putRows(hbaseTable: String, dataFrame: DataFrame, rowKeyColumn: String, columns: Array[String], cfColsMap: Map[String, String]) {
    try {
      // Configure And Connect
      val conf = HBaseConfiguration.create()
      val cnxn = ConnectionFactory.createConnection(conf)
      // Create Connection to HBase table
      val tbl = cnxn.getTable(TableName.valueOf(hbaseTable))
      val rows = dataFrame.rdd.map { row =>
        (row.getAs(rowKeyColumn).toString,
          columns.map(eachCol => (cfColsMap.getOrElse(eachCol, ""), eachCol, row.getAs(eachCol).asInstanceOf[String]))
        )
      }.collect()
      // Performing put operation on each row of dataframe
      rows.foreach { row =>
        val putRow: Put = new Put(Bytes.toBytes(row._1.asInstanceOf[String]))
        row._2.foreach(x => if (x._2 != rowKeyColumn) putRow.addColumn(Bytes.toBytes(x._1), Bytes.toBytes(x._2), Bytes.toBytes(x._3)))
        tbl.put(putRow)
      }
      tbl.close()
    } catch {
      case ex: Throwable =>
        ex.printStackTrace()
        throw ex
    }
  }
}

Example 10

Source File: Hdfs2HBase.scala From wow-spark with MIT License

5 votes

package com.sev7e0.wow.hbase

import java.util

import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
import org.apache.hadoop.hbase.client.{ConnectionFactory, Put}
import org.apache.spark.{SparkConf, SparkContext}

object Hdfs2HBase {

  def main(args: Array[String]): Unit = {

    val conf = new SparkConf()
      .setMaster("spark://spark01:7077")
      .setAppName(Hdfs2HBase.getClass.getName)
      .set("spark.jars", "target/wow-spark-1.0-SNAPSHOT.jar")

    val sparkContext = new SparkContext(conf)

    val userRDD = sparkContext.textFile("hdfs://spark01:9000/spark/users.dat",2).map(_.split("::"))

    userRDD.foreachPartition(iter =>{
      val configuration = HBaseConfiguration.create()
//      configuration.set("hbase.zookeeper.quorum","spark01:2181,spark02:2181,spark03:2181")
      configuration.set("hbase.zookeeper.quorum", "spark01")
      configuration.set("hbase.zookeeper.property.clientPort", "2181")
      //创建连接
      val connection = ConnectionFactory.createConnection(configuration)
      //get table object
      val person = connection.getTable(TableName.valueOf("users"))

      iter.foreach(p=>{
        val arrayList = new util.ArrayList[Put]()
        val put = new Put(p(0).getBytes)
        arrayList.add(put.addColumn("f1".getBytes,"gender".getBytes,p(1).getBytes))
        arrayList.add(put.addColumn("f1".getBytes,"age".getBytes,p(2).getBytes))
        arrayList.add(put.addColumn("f2".getBytes,"position".getBytes,p(3).getBytes))
        arrayList.add(put.addColumn("f2".getBytes,"code".getBytes,p(4).getBytes))
        person.put(arrayList)
      })
    })
    sparkContext.stop()

  }

}

Example 11

Source File: HogEvent.scala From hogzilla with GNU General Public License v2.0

5 votes

package org.hogzilla.event

import java.util.HashMap
import java.util.Map
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.util.Bytes
import org.hogzilla.hbase.HogHBaseRDD
import org.hogzilla.util.HogFlow
import java.net.InetAddress


class HogEvent(flow:HogFlow) 
{
	var sensorid:Int=0
	var signature_id:Double=0
	var priorityid:Int=0
	var text:String=""
	var data:Map[String,String]=new HashMap()
  var ports:String=""
  var title:String=""
  var username:String=""
  var coords:String=""
 
  
  def formatIPtoBytes(ip:String):Array[Byte] =
  {
    try {
       // Eca! Snorby doesn't support IPv6 yet. See https://github.com/Snorby/snorby/issues/65
    if(ip.contains(":"))
      InetAddress.getByName("255.255.6.6").getAddress
    else  
      InetAddress.getByName(ip).getAddress
    } catch {
      case t: Throwable => 
        // Bogus address!
        InetAddress.getByName("255.255.1.1").getAddress
    }   
   
  }

  
   def alert()
   {
	   val put = new Put(Bytes.toBytes(flow.get("flow:id")))
     put.add(Bytes.toBytes("event"), Bytes.toBytes("note"), Bytes.toBytes(text))
     put.add(Bytes.toBytes("event"), Bytes.toBytes("lower_ip"), formatIPtoBytes(flow.lower_ip))
     put.add(Bytes.toBytes("event"), Bytes.toBytes("upper_ip"), formatIPtoBytes(flow.upper_ip))
     put.add(Bytes.toBytes("event"), Bytes.toBytes("lower_ip_str"), Bytes.toBytes(flow.lower_ip))
     put.add(Bytes.toBytes("event"), Bytes.toBytes("upper_ip_str"), Bytes.toBytes(flow.upper_ip))
     put.add(Bytes.toBytes("event"), Bytes.toBytes("signature_id"), Bytes.toBytes("%.0f".format(signature_id)))
     put.add(Bytes.toBytes("event"), Bytes.toBytes("time"), Bytes.toBytes(System.currentTimeMillis))
     put.add(Bytes.toBytes("event"), Bytes.toBytes("ports"), Bytes.toBytes(ports))
     put.add(Bytes.toBytes("event"), Bytes.toBytes("title"), Bytes.toBytes(title))
     
     if(!username.equals(""))
       put.add(Bytes.toBytes("event"), Bytes.toBytes("username"), Bytes.toBytes(username))
     if(!coords.equals(""))
       put.add(Bytes.toBytes("event"), Bytes.toBytes("coords"), Bytes.toBytes(coords))
     
     HogHBaseRDD.hogzilla_events.put(put)

     //println(f"ALERT: $text%100s\n\n@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
   }
}

Example 12

Source File: HogSignature.scala From hogzilla with GNU General Public License v2.0

5 votes

package org.hogzilla.event

import org.hogzilla.hbase.HogHBaseRDD
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.client.Get
import org.apache.hadoop.hbase.client.Put




case class HogSignature(signature_class:Int, signature_name:String, signature_priority:Int, signature_revision:Int, signature_id:Double,signature_group_id:Int) {
  //Example: 3,"HZ: Suspicious DNS flow identified by K-Means clustering",2,1,826000001,826
  
  def saveHBase():HogSignature =
  {
    val get = new Get(Bytes.toBytes("%.0f".format(signature_id)))
    
    if(!HogHBaseRDD.hogzilla_sensor.exists(get))
    {
      val put = new Put(Bytes.toBytes("%.0f".format(signature_id)))
      put.add(Bytes.toBytes("signature"), Bytes.toBytes("id"), Bytes.toBytes("%.0f".format(signature_id)))
      put.add(Bytes.toBytes("signature"), Bytes.toBytes("class"), Bytes.toBytes(signature_class.toString()))
      put.add(Bytes.toBytes("signature"), Bytes.toBytes("name"), Bytes.toBytes(signature_name))
      put.add(Bytes.toBytes("signature"), Bytes.toBytes("priority"), Bytes.toBytes(signature_priority.toString()))
      put.add(Bytes.toBytes("signature"), Bytes.toBytes("revision"), Bytes.toBytes(signature_revision.toString()))
      put.add(Bytes.toBytes("signature"), Bytes.toBytes("group_id"), Bytes.toBytes(signature_group_id.toString()))
      HogHBaseRDD.hogzilla_signatures.put(put)
    }
    
    this
  }
}

Example 13

Source File: HbRddWriter.scala From hbrdd with Apache License 2.0

5 votes

package top.spoofer.hbrdd.hbsupport

import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.spark.rdd.RDD
import top.spoofer.hbrdd.config.HbRddConfig
import top.spoofer.hbrdd.unit.HbRddFormatsWriter
import top.spoofer.hbrdd._
import HbRddWritPuter._

trait HbRddWriter {
  type TsValue[A] = (Long, A) // (ts, A)
  val LATEST_TIMESTAMP = Long.MaxValue
  
final class SingleFamilyRDDWriter[A](
    val rdd: RDD[(String, Map[String, A])],
    val put: HbRddPuter[A]
) extends HbRddWritCommon[A] with Serializable {
  def put2Hbase(tableName: String, family: String)(implicit config: HbRddConfig) = {
    val job = createJob(tableName, config.getHbaseConfig)
    rdd.flatMap({ case (rowId, data) => convert2Writable(rowId, Map(family -> data), put) })
      .saveAsNewAPIHadoopDataset(job.getConfiguration)
  }
}

Example 14

Source File: HogHBaseCluster.scala From hogzilla with GNU General Public License v2.0

5 votes

package org.hogzilla.hbase

import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.linalg.Vector
import org.apache.hadoop.hbase.client.Get
import org.apache.hadoop.hbase.client.Delete
import org.hogzilla.cluster.HogClusterMember


object HogHBaseCluster {

 def formatClusterTitle(clusterCentroid: List[(Long,Double)], clusterIdx:Int):String =
 {
   val mainTitle = 
   "Group "+clusterIdx.toString+" - "+
   clusterCentroid
   .filter({case (port,rate) =>
            rate > 4.999
          })
   .map({case (port,rate) =>
            port.toString()+":"+"%.0f".format(rate)+"%"
        }).mkString(", ")
        
   val onePercentList=
   clusterCentroid
   .filter({case (port,rate) =>
            .9999 < rate & rate < 5
          })
          
   if(onePercentList.size>0)
   {
     mainTitle+", "+
     onePercentList.map({case (port,rate) =>
            port.toString()
        }).mkString("(",", ",")"+"> 1%")
     
   }else
   {
     mainTitle
   }
 }
 
 def deleteCluster(clusterIdx:Int)=
 {
     val del = new Delete(Bytes.toBytes(clusterIdx.toString))
     HogHBaseRDD.hogzilla_clusters.delete(del)
 }
 
 
 def deleteClusterMember(memberIP:String)=
 {
     val del = new Delete(Bytes.toBytes(memberIP))
     HogHBaseRDD.hogzilla_cluster_members.delete(del)
 }
 
 def saveCluster(clusterIdx:Int, clusterCentroid:List[(Long,Double)], clusterSize: Long, members:Array[String]) = {
   
     val memberString = members.mkString(",")
   
     val put = new Put(Bytes.toBytes(clusterIdx.toString))
     put.add(Bytes.toBytes("info"), Bytes.toBytes("title"), Bytes.toBytes(formatClusterTitle(clusterCentroid,clusterIdx)))
     put.add(Bytes.toBytes("info"), Bytes.toBytes("size"), Bytes.toBytes(clusterSize.toString))
     put.add(Bytes.toBytes("info"), Bytes.toBytes("centroid"), Bytes.toBytes(clusterCentroid.mkString("[",",","]")))
     put.add(Bytes.toBytes("info"), Bytes.toBytes("members"), Bytes.toBytes(memberString))
     
     HogHBaseRDD.hogzilla_clusters.put(put)
  }
 
 def saveClusterMember(clusterMember:HogClusterMember) = {
   
     val put = new Put(Bytes.toBytes(clusterMember.memberIP.toString))
     put.add(Bytes.toBytes("info"),   Bytes.toBytes("title"),      Bytes.toBytes(clusterMember.formatTitle))
     put.add(Bytes.toBytes("cluster"),Bytes.toBytes("size"),       Bytes.toBytes(clusterMember.clusterSize.toString))
     put.add(Bytes.toBytes("cluster"),Bytes.toBytes("centroid"),   Bytes.toBytes(clusterMember.centroid.mkString("[",",","]")))
     put.add(Bytes.toBytes("cluster"),Bytes.toBytes("idx"),        Bytes.toBytes(clusterMember.clusterIdx.toString))
     put.add(Bytes.toBytes("cluster"),Bytes.toBytes("description"),Bytes.toBytes(formatClusterTitle(clusterMember.centroid,clusterMember.clusterIdx)))
     put.add(Bytes.toBytes("member"), Bytes.toBytes("ports"),      Bytes.toBytes("TCP: "+clusterMember.ports.mkString(""," ","")))
     put.add(Bytes.toBytes("member"), Bytes.toBytes("frequencies"),Bytes.toBytes("TCP: "+
                                                                           clusterMember.frequency_vector
                                                                           .filter({case (port,freq) => clusterMember.ports.contains(port)})
                                                                           .map({case (port,freq) => port.toString+"="+
                                                                                                     "%.0f".format(freq)+"%"
                                                                                })
                                                                           .mkString(""," ","")
                                                                          ))
     put.add(Bytes.toBytes("member"), Bytes.toBytes("ip"),         Bytes.toBytes(clusterMember.memberIP))
     put.add(Bytes.toBytes("member"), Bytes.toBytes("distance"),   Bytes.toBytes("%.2f".format(clusterMember.distance)))
     
     
     HogHBaseRDD.hogzilla_cluster_members.put(put)
  }
  

}

Example 15

Source File: HBaseForeachPartitionExample.scala From SparkOnHBase with Apache License 2.0

5 votes

package org.apache.hadoop.hbase.spark.example.rdd

import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.{TableName, HBaseConfiguration}
import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.{SparkContext, SparkConf}


object HBaseForeachPartitionExample {
  def main(args: Array[String]) {
    if (args.length < 2) {
      println("HBaseBulkPutExample {tableName} {columnFamily}")
      return
    }

    val tableName = args(0)
    val columnFamily = args(1)

    val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " +
      tableName + " " + columnFamily)
    val sc = new SparkContext(sparkConf)

    try {
      //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])]
      val rdd = sc.parallelize(Array(
        (Bytes.toBytes("1"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
        (Bytes.toBytes("2"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
        (Bytes.toBytes("3"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
        (Bytes.toBytes("4"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
        (Bytes.toBytes("5"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))
      ))

      val conf = HBaseConfiguration.create()

      val hbaseContext = new HBaseContext(sc, conf)


      rdd.hbaseForeachPartition(hbaseContext,
        (it, connection) => {
          val m = connection.getBufferedMutator(TableName.valueOf(tableName))

          it.foreach(r => {
            val put = new Put(r._1)
            r._2.foreach((putValue) =>
              put.addColumn(putValue._1, putValue._2, putValue._3))
            m.mutate(put)
          })
          m.flush()
          m.close()
        })

    } finally {
      sc.stop()
    }
  }
}

Example 16

Source File: HBaseBulkPutExample.scala From SparkOnHBase with Apache License 2.0

5 votes

package org.apache.hadoop.hbase.spark.example.rdd

import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
import org.apache.spark.{SparkConf, SparkContext}


object HBaseBulkPutExample {
   def main(args: Array[String]) {
     if (args.length < 2) {
       println("HBaseBulkPutExample {tableName} {columnFamily}")
       return
     }

     val tableName = args(0)
     val columnFamily = args(1)

     val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " +
       tableName + " " + columnFamily)
     val sc = new SparkContext(sparkConf)

     try {
       //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])]
       val rdd = sc.parallelize(Array(
         (Bytes.toBytes("1"),
           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
         (Bytes.toBytes("2"),
           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
         (Bytes.toBytes("3"),
           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
         (Bytes.toBytes("4"),
           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
         (Bytes.toBytes("5"),
           Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))
       ))

       val conf = HBaseConfiguration.create()

       val hbaseContext = new HBaseContext(sc, conf)

       rdd.hbaseBulkPut(hbaseContext, TableName.valueOf(tableName),
         (putRecord) => {
           val put = new Put(putRecord._1)
           putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2,
             putValue._3))
           put
         })

     } finally {
       sc.stop()
     }
   }
 }

Example 17

Source File: HBaseBulkPutExampleFromFile.scala From SparkOnHBase with Apache License 2.0

5 votes

package org.apache.hadoop.hbase.spark.example.hbasecontext

import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.spark.SparkContext
import org.apache.hadoop.hbase.{TableName, HBaseConfiguration}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.mapred.TextInputFormat
import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.io.Text
import org.apache.spark.SparkConf


object HBaseBulkPutExampleFromFile {
  def main(args: Array[String]) {
    if (args.length < 3) {
      println("HBaseBulkPutExampleFromFile {tableName} {columnFamily} {inputFile}")
      return
    }

    val tableName = args(0)
    val columnFamily = args(1)
    val inputFile = args(2)

    val sparkConf = new SparkConf().setAppName("HBaseBulkPutExampleFromFile " +
      tableName + " " + columnFamily + " " + inputFile)
    val sc = new SparkContext(sparkConf)

    try {
      var rdd = sc.hadoopFile(
        inputFile,
        classOf[TextInputFormat],
        classOf[LongWritable],
        classOf[Text]).map(v => {
        System.out.println("reading-" + v._2.toString)
        v._2.toString
      })

      val conf = HBaseConfiguration.create()

      val hbaseContext = new HBaseContext(sc, conf)
      hbaseContext.bulkPut[String](rdd,
        TableName.valueOf(tableName),
        (putRecord) => {
          System.out.println("hbase-" + putRecord)
          val put = new Put(Bytes.toBytes("Value- " + putRecord))
          put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("1"),
            Bytes.toBytes(putRecord.length()))
          put
        });
    } finally {
      sc.stop()
    }
  }
}

Example 18

Source File: HBaseBulkPutExample.scala From SparkOnHBase with Apache License 2.0

5 votes

package org.apache.hadoop.hbase.spark.example.hbasecontext

import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.spark.SparkContext
import org.apache.hadoop.hbase.{TableName, HBaseConfiguration}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.client.Put
import org.apache.spark.SparkConf


object HBaseBulkPutExample {
  def main(args: Array[String]) {
    if (args.length < 2) {
      println("HBaseBulkPutExample {tableName} {columnFamily}")
      return
    }

    val tableName = args(0)
    val columnFamily = args(1)

    val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " +
      tableName + " " + columnFamily)
    val sc = new SparkContext(sparkConf)

    try {
      //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])]
      val rdd = sc.parallelize(Array(
        (Bytes.toBytes("1"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
        (Bytes.toBytes("2"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
        (Bytes.toBytes("3"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
        (Bytes.toBytes("4"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
        (Bytes.toBytes("5"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))
      ))

      val conf = HBaseConfiguration.create()

      val hbaseContext = new HBaseContext(sc, conf)
      hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd,
        TableName.valueOf(tableName),
        (putRecord) => {
          val put = new Put(putRecord._1)
          putRecord._2.foreach((putValue) =>
            put.addColumn(putValue._1, putValue._2, putValue._3))
          put
        });
    } finally {
      sc.stop()
    }
  }
}

Example 19

Source File: HBaseBulkPutTimestampExample.scala From SparkOnHBase with Apache License 2.0

5 votes

package org.apache.hadoop.hbase.spark.example.hbasecontext

import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.spark.SparkContext
import org.apache.hadoop.hbase.{TableName, HBaseConfiguration}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.client.Put
import org.apache.spark.SparkConf


object HBaseBulkPutTimestampExample {
  def main(args: Array[String]) {
    if (args.length < 2) {
      System.out.println("HBaseBulkPutTimestampExample {tableName} {columnFamily}")
      return
    }

    val tableName = args(0)
    val columnFamily = args(1)

    val sparkConf = new SparkConf().setAppName("HBaseBulkPutTimestampExample " +
      tableName + " " + columnFamily)
    val sc = new SparkContext(sparkConf)

    try {

      val rdd = sc.parallelize(Array(
        (Bytes.toBytes("6"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
        (Bytes.toBytes("7"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
        (Bytes.toBytes("8"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
        (Bytes.toBytes("9"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
        (Bytes.toBytes("10"),
          Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))))

      val conf = HBaseConfiguration.create()

      val timeStamp = System.currentTimeMillis()

      val hbaseContext = new HBaseContext(sc, conf)
      hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd,
        TableName.valueOf(tableName),
        (putRecord) => {
          val put = new Put(putRecord._1)
          putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2,
            timeStamp, putValue._3))
          put
        })
    } finally {
      sc.stop()
    }
  }
}

Example 20

Source File: SparkApplicationTester.scala From TopNotch with Apache License 2.0

5 votes

package com.bfm.topnotch

import org.scalatest.OneInstancePerTest
import org.apache.hadoop.hbase.CellUtil
import org.apache.hadoop.hbase.client.{HConnection, HTableInterface, Put}
import org.apache.spark._
import org.apache.spark.sql.SparkSession
import org.scalamock.scalatest.MockFactory
import org.scalatest.FlatSpec
import com.typesafe.scalalogging.StrictLogging

/**
 * This class handles some of the boilerplate of testing SparkApplications with HBase writers
 */
abstract class SparkApplicationTester extends FlatSpec with OneInstancePerTest with MockFactory with StrictLogging
  with SharedSparkContext {
  protected val hconn = mock[HConnection]
  lazy val spark = SparkSession
    .builder()
    .appName(getClass.getName)
    .master("local")
    .config("spark.sql.shuffle.partitions", "4")
    //setting this to false to emulate HiveQL's case insensitivity for column names
    .config("spark.sql.caseSensitive", "false")
    .getOrCreate()

  /**
   * Verify that the next HTable will receive the correct puts. Call this once per HTable that is supposed to be created and written to.
   * Note: All HBase tests for a SparkApplication object must be run sequentially in order for us to keep track of HTableInterface mocks
   * @param tests The test's expected name for the HTable and expected values for the Put objects placed in the HTable
   * @param acceptAnyPut Tells the mock to accept any put value. This is useful for tests using the mock and but not
   *                     testing what is put inside it.
   */
  def setHBaseMock(tests: HTableParams, acceptAnyPut: Boolean = false): Unit = {
    val table = mock[HTableInterface]
    inSequence {
      (hconn.getTable(_: String)).expects(tests.tableName).returning(table)
      inAnyOrder {
        for (correctPut <- tests.puts) {
          if (acceptAnyPut) {
            (table.put(_: Put)).expects(*)
          }
          else {
            (table.put(_: Put)).expects(where {
              (actualPut: Put) =>
                val actualValue = CellUtil.cloneValue(actualPut.get(correctPut.columnFamily, correctPut.columnQualifier).get(0))
                correctPut.valueTest(actualValue)
                // just return true, as if issues, will have exception thrown by value test
                true
            })
          }
        }
      }
      (table.close _).expects().returns()
    }
  }

  /**
    * Set the next HTable will accept anything accept anything. This is useful if testing a thing that needs an hbase
    * table, but the specific test isn't testing the hbase functionality.
    *
    * @param tableName the name of the table that will be accessed.
    */
  def allowAnyHBaseActions(tableName: String): Unit ={
    setHBaseMock(new HTableParams(tableName, Seq(null)), true)
  }

  /**
   * The set of parameters defining what values should be used to create the HTable
   * @param tableName The name of the table the test expects to be created
   * @param puts The list of parameters for the puts that the test expects to be placed in the table
   */
  case class HTableParams(
                           tableName: String,
                           puts: Seq[HPutParams]
                           )

  /**
   * The list of values that the test expects to be in a put.
   * @param row The name of the row to put into HBase
   * @param columnFamily The cell's column family
   * @param columnQualifier The cell's column qualifier
   * @param correctString A string representing the correct value or an error message
   * @param valueTest The method for checking if the value in the cell is correct. Done as the actual and intended values
   *                  in a cell may be equal even if they don't have the expression as an array of bytes.
    *                 This should throw an exception on failure, using a call like shouldBe
   */
  case class HPutParams(
                         row: Array[Byte],
                         columnFamily: Array[Byte],
                         columnQualifier: Array[Byte],
                         correctString: String,
                         valueTest: Array[Byte] => Unit
                         )
}

Example 21

Source File: HBaseUtil.scala From sprue with Apache License 2.0

5 votes

package com.cloudera.sprue

import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.util.Bytes

object HBaseUtil {
  val columnFamily: String = "cf1"

  def insertIncomingDataIntoHBase(patient: Patient): Put = {
    if (patient.getPatientId == null) {
      return null
    } else {
      val put = new Put(Bytes.toBytes(patient.getPatientId))
      if (patient.getPatientId != null)
        put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("patientId"), Bytes.toBytes(patient.getPatientId))
      if (patient.getLocation != null)
        put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("location"), Bytes.toBytes(patient.getLocation))
      if ((patient.getEvaluationDate : java.lang.Long) != null)
        put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("evaluationDate"), Bytes.toBytes(patient.getEvaluationDate))
      if ((patient.getTemperature  : java.lang.Float) != null)
        put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("temperature"), Bytes.toBytes(patient.getTemperature))
      if ((patient.getWbc  : java.lang.Integer) != null)
        put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("wbc"), Bytes.toBytes(patient.getWbc))

      if ((patient.getHeartRate  : java.lang.Integer) != null)
        put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("heartRate"), Bytes.toBytes(patient.getHeartRate))
      if ((patient.getRespiratoryRate  : java.lang.Integer) != null)
        put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("respiratoryRate"), Bytes.toBytes(patient.getRespiratoryRate))
      if ((patient.getSbp  : java.lang.Integer) != null)
        put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("sbp"), Bytes.toBytes(patient.getSbp))
      if ((patient.getHypotension  : java.lang.Integer) != null)
        put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("hypotension"), Bytes.toBytes(patient.getHypotension))
      if (patient.getInfectionFlag != null)
        put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("infectionFlag"), Bytes.toBytes(patient.getInfectionFlag))
      if ((patient.getOrganFailCount  : java.lang.Integer) != null)
        put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("organFailCount"), Bytes.toBytes(patient.getOrganFailCount))

      put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("evalFinished"), Bytes.toBytes("N"))
      put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("recordUpdatedTime"), Bytes.toBytes(System.currentTimeMillis))

    }
  }

  def insertEvaluatedDataIntoHBase(patient: Patient): Put = {
    if (patient.getPatientId == null) {
      return null
    } else {
      val put = new Put(Bytes.toBytes(patient.getPatientId))
      put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("evalFinished"), Bytes.toBytes("Y"))
      put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("sirsCounter"), Bytes.toBytes(patient.getSirsCounter))
      put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("sirsFlag"), Bytes.toBytes(patient.getSirsFlag))
      put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("sepsisFlag"), Bytes.toBytes(patient.getSepsisFlag))
      put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("severeSepsisFlag"), Bytes.toBytes(patient.getSevereSepsisFlag))
      put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("septicShockFlag"), Bytes.toBytes(patient.getSepticShockFlag))
      put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("organDysfunctionFlag"), Bytes.toBytes(patient.getOrganDysfunctionSyndrome))
      put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("systemEvalTime"), Bytes.toBytes(System.currentTimeMillis))
      put
    }
  }
}

Example 22

Source File: L6-14HBase.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.io.Text
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions
import org.json4s.DefaultFormats
import org.json4s.jvalue2extractable
import org.json4s.jvalue2monadic
import org.json4s.native.JsonMethods.parse
import org.json4s.string2JsonInput

object HBaseSinkApp {

  def main(args: Array[String]) {
    if (args.length != 5) {
      System.err.println(
        "Usage: HBaseSinkApp <appname> <hbaseMaster> <tableName> <columnFamilyName> <columnName>")
      System.exit(1)
    }

    val Seq(appName, hbaseMaster, tableName, columnFamilyName, columnName) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val batchInterval = 10
    val windowSize = 20
    val slideInterval = 10

    val ssc = new StreamingContext(conf, Seconds(batchInterval))

    HttpUtils.createStream(ssc, url = "https://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20yahoo.finance.quotes%20where%20symbol%20in%20(%22IBM,GOOG,MSFT,AAPL,FB,ORCL,YHOO,TWTR,LNKD,INTC%22)%0A%09%09&format=json&diagnostics=true&env=http%3A%2F%2Fdatatables.org%2Falltables.env",
      interval = batchInterval)
      .flatMap(rec => {
        implicit val formats = DefaultFormats
        val query = parse(rec) \ "query"
        ((query \ "results" \ "quote").children)
          .map(rec => ((rec \ "symbol").extract[String], (rec \ "LastTradePriceOnly").extract[String].toFloat))
      })
      .reduceByKeyAndWindow((x: Float, y: Float) => (x + y), Seconds(windowSize), Seconds(slideInterval))
      .foreachRDD(rdd => {
        val hbaseConf = HBaseConfiguration.create()
        hbaseConf.set(TableOutputFormat.OUTPUT_TABLE, tableName)
        hbaseConf.set("hbase.master", hbaseMaster)
        val jobConf = new Configuration(hbaseConf)
        jobConf.set("mapreduce.job.outputformat.class", classOf[TableOutputFormat[Text]].getName)
        rdd.map(rec => {
          val put = new Put(rec._1.getBytes)
          put.addColumn(columnFamilyName.getBytes, columnName.getBytes, Bytes.toBytes(rec._2 / (windowSize / batchInterval)))
          (rec._1, put)
        }).saveAsNewAPIHadoopDataset(jobConf)
      })

    ssc.start()
    ssc.awaitTermination()
  }
}

Example 23

Source File: L6-16SparkHBase.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.TableName
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions
import org.json4s.DefaultFormats
import org.json4s.jvalue2extractable
import org.json4s.jvalue2monadic
import org.json4s.native.JsonMethods.parse
import org.json4s.string2JsonInput

object SparkHBaseBulkPutApp {

  def main(args: Array[String]) {
    if (args.length != 4) {
      System.err.println(
        "Usage: SparkHBaseBulkPutApp <appname> <tableName> <columnFamilyName> <columnName>")
      System.exit(1)
    }

    val Seq(appName, tableName, columnFamilyName, columnName) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val batchInterval = 10
    val windowSize = 20
    val slideInterval = 10

    val ssc = new StreamingContext(conf, Seconds(batchInterval))

    val hbaseConf = HBaseConfiguration.create()
    val hContext = new HBaseContext(ssc.sparkContext, hbaseConf)

    val windowed = HttpUtils.createStream(ssc, url = "https://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20yahoo.finance.quotes%20where%20symbol%20in%20(%22IBM,GOOG,MSFT,AAPL,FB,ORCL,YHOO,TWTR,LNKD,INTC%22)%0A%09%09&format=json&diagnostics=true&env=http%3A%2F%2Fdatatables.org%2Falltables.env",
      interval = batchInterval)
      .flatMap(rec => {
        implicit val formats = DefaultFormats
        val query = parse(rec) \ "query"
        ((query \ "results" \ "quote").children)
          .map(rec => ((rec \ "symbol").extract[String], (rec \ "LastTradePriceOnly").extract[String].toFloat))
      })
      .reduceByKeyAndWindow((x: Float, y: Float) => (x + y), Seconds(windowSize), Seconds(slideInterval))

    hContext.streamBulkPut[(String, Float)](windowed, TableName.valueOf(tableName), rec => {
      val put = new Put(rec._1.getBytes)
      put.addColumn(columnFamilyName.getBytes, columnName.getBytes, Bytes.toBytes(rec._2 / (windowSize / batchInterval)))
      put
    })

    ssc.start()
    ssc.awaitTermination()
  }
}

Example 24

Source File: HBaseStreamingBulkPutExample.scala From SparkOnHBase with Apache License 2.0

4 votes

package org.apache.hadoop.hbase.spark.example.hbasecontext

import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.spark.SparkContext
import org.apache.hadoop.hbase.{TableName, HBaseConfiguration}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.client.Put
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.SparkConf


object HBaseStreamingBulkPutExample {
  def main(args: Array[String]) {
    if (args.length < 4) {
      println("HBaseStreamingBulkPutExample " +
        "{host} {port} {tableName} {columnFamily}")
      return
    }

    val host = args(0)
    val port = args(1)
    val tableName = args(2)
    val columnFamily = args(3)

    val sparkConf = new SparkConf().setAppName("HBaseBulkPutTimestampExample " +
      tableName + " " + columnFamily)
    val sc = new SparkContext(sparkConf)
    try {
      val ssc = new StreamingContext(sc, Seconds(1))

      val lines = ssc.socketTextStream(host, port.toInt)

      val conf = HBaseConfiguration.create()

      val hbaseContext = new HBaseContext(sc, conf)

      hbaseContext.streamBulkPut[String](lines,
        TableName.valueOf(tableName),
        (putRecord) => {
          if (putRecord.length() > 0) {
            val put = new Put(Bytes.toBytes(putRecord))
            put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("foo"), Bytes.toBytes("bar"))
            put
          } else {
            null
          }
        })
      ssc.start()
      ssc.awaitTerminationOrTimeout(60000)
    } finally {
      sc.stop()
    }
  }
}