org.apache.hadoop.hbase.client.Put Scala Examples
The following examples show how to use org.apache.hadoop.hbase.client.Put.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: HogHBaseReputation.scala From hogzilla with GNU General Public License v2.0 | 5 votes |
package org.hogzilla.hbase import scala.math.random import java.lang.Math import org.apache.spark._ import org.apache.hadoop.hbase.client.HBaseAdmin import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor, TableName} import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.spark.mllib.regression.{LabeledPoint,LinearRegressionModel,LinearRegressionWithSGD} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.rdd.RDD import org.apache.hadoop.hbase.client.HTable import org.apache.hadoop.hbase.filter.SingleColumnValueFilter import org.apache.hadoop.hbase.filter.BinaryComparator import org.apache.hadoop.hbase.filter.FilterList import org.apache.hadoop.hbase.filter.CompareFilter import java.util.ArrayList import org.apache.hadoop.hbase.client.Scan import org.apache.hadoop.hbase.filter.Filter import scala.collection.mutable.HashSet import org.apache.hadoop.hbase.client.Put object HogHBaseReputation { // Ex: MX, whitelist def getReputationList(listName:String, listType:String):Set[String] = { val list = new HashSet[String] val filters: ArrayList[Filter] = new ArrayList(); val colValFilter1 = new SingleColumnValueFilter(Bytes.toBytes("rep"), Bytes.toBytes("list_type"), CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(listType))) colValFilter1.setFilterIfMissing(false); val colValFilter2 = new SingleColumnValueFilter(Bytes.toBytes("rep"), Bytes.toBytes("list"), CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(listName))) colValFilter2.setFilterIfMissing(false); filters.add(colValFilter1); filters.add(colValFilter2); val filterList = new FilterList( FilterList.Operator.MUST_PASS_ALL, filters); val scan = new Scan() scan.setFilter(filterList) val it = HogHBaseRDD.hogzilla_reputation.getScanner(scan).iterator() while(it.hasNext()) { list.add( Bytes.toString(it.next().getValue(Bytes.toBytes("rep"),Bytes.toBytes("ip"))) ) } list.toSet } def saveReputationList(listName:String, listType:String, ip:String) = { val put = new Put(Bytes.toBytes(ip+"-"+listName+"-"+listType)) put.add(Bytes.toBytes("rep"), Bytes.toBytes("list_type"), Bytes.toBytes(listType)) put.add(Bytes.toBytes("rep"), Bytes.toBytes("list"), Bytes.toBytes(listName)) put.add(Bytes.toBytes("rep"), Bytes.toBytes("ip"), Bytes.toBytes(ip)) HogHBaseRDD.hogzilla_reputation.put(put) } }
Example 2
package org.apache.spark.sql import java.io.File import com.google.common.io.Files import org.apache.hadoop.hbase.{HColumnDescriptor, HTableDescriptor, TableName, HBaseTestingUtility} import org.apache.hadoop.hbase.client.{Scan, Put, ConnectionFactory, Table} import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.sql.execution.datasources.hbase.SparkHBaseConf import org.apache.spark.sql.types.UTF8String import org.apache.spark.{SparkContext, SparkConf, Logging} import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} import scala.collection.JavaConverters._ class SHC extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll with Logging { implicit class StringToColumn(val sc: StringContext) { def $(args: Any*): ColumnName = { new ColumnName(sc.s(args: _*)) } } private[spark] var htu = HBaseTestingUtility.createLocalHTU() private[spark] def tableName = "table1" private[spark] def columnFamilies: Array[String] = Array.tabulate(9){ x=> s"cf$x"} var table: Table = null val conf = new SparkConf conf.set(SparkHBaseConf.testConf, "true") SparkHBaseConf.conf = htu.getConfiguration // private[spark] var columnFamilyStr = Bytes.toString(columnFamily) def catalog = s"""{ |"table":{"namespace":"default", "name":"table1"}, |"rowkey":"key", |"columns":{ |"col0":{"cf":"rowkey", "col":"key", "type":"string"}, |"col1":{"cf":"cf1", "col":"col1", "type":"boolean"}, |"col2":{"cf":"cf2", "col":"col2", "type":"double"}, |"col3":{"cf":"cf3", "col":"col3", "type":"float"}, |"col4":{"cf":"cf4", "col":"col4", "type":"int"}, |"col5":{"cf":"cf5", "col":"col5", "type":"bigint"}, |"col6":{"cf":"cf6", "col":"col6", "type":"smallint"}, |"col7":{"cf":"cf7", "col":"col7", "type":"string"}, |"col8":{"cf":"cf8", "col":"col8", "type":"tinyint"} |} |}""".stripMargin override def beforeAll() { val tempDir: File = Files.createTempDir tempDir.deleteOnExit htu.cleanupTestDir htu.startMiniZKCluster htu.startMiniHBaseCluster(1, 4) logInfo(" - minicluster started") println(" - minicluster started") } override def afterAll() { try { table.close() println("shutdown") htu.deleteTable(TableName.valueOf(tableName)) logInfo("shuting down minicluster") htu.shutdownMiniHBaseCluster htu.shutdownMiniZKCluster logInfo(" - minicluster shut down") htu.cleanupTestDir } catch { case _ => logError("teardown error") } } def createTable(name: String, cfs: Array[String]) { val tName = Bytes.toBytes(name) val bcfs = cfs.map(Bytes.toBytes(_)) try { htu.deleteTable(TableName.valueOf(tName)) } catch { case _ => logInfo(" - no table " + name + " found") } htu.createMultiRegionTable(TableName.valueOf(tName), bcfs) } def createTable(name: Array[Byte], cfs: Array[Array[Byte]]) { try { htu.deleteTable(TableName.valueOf(name)) } catch { case _ => logInfo(" - no table " + Bytes.toString(name) + " found") } htu.createMultiRegionTable(TableName.valueOf(name), cfs) } }
Example 3
Source File: HBaseForeachPartitionExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseForeachPartitionExample { def main(args: Array[String]) { if (args.length < 2) { println("HBaseForeachPartitionExample {tableName} {columnFamily} are missing an arguments") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseForeachPartitionExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] val rdd = sc.parallelize(Array( (Bytes.toBytes("1"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("2"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("3"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("4"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("5"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))) )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) rdd.hbaseForeachPartition(hbaseContext, (it, connection) => { val m = connection.getBufferedMutator(TableName.valueOf(tableName)) it.foreach(r => { val put = new Put(r._1) r._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) m.mutate(put) }) m.flush() m.close() }) } finally { sc.stop() } } }
Example 4
Source File: HBaseBulkPutExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkPutExample { def main(args: Array[String]) { if (args.length < 2) { println("HBaseBulkPutExample {tableName} {columnFamily} are missing an arguments") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] val rdd = sc.parallelize(Array( (Bytes.toBytes("1"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("2"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("3"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("4"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("5"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))) )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) rdd.hbaseBulkPut(hbaseContext, TableName.valueOf(tableName), (putRecord) => { val put = new Put(putRecord._1) putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) put }) } finally { sc.stop() } } }
Example 5
Source File: HBaseStreamingBulkPutExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.streaming.Seconds import org.apache.spark.streaming.StreamingContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseStreamingBulkPutExample { def main(args: Array[String]) { if (args.length < 4) { println("HBaseStreamingBulkPutExample " + "{host} {port} {tableName} {columnFamily} are missing an argument") return } val host = args(0) val port = args(1) val tableName = args(2) val columnFamily = args(3) val sparkConf = new SparkConf().setAppName("HBaseStreamingBulkPutExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { val ssc = new StreamingContext(sc, Seconds(1)) val lines = ssc.socketTextStream(host, port.toInt) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.streamBulkPut[String](lines, TableName.valueOf(tableName), (putRecord) => { if (putRecord.length() > 0) { val put = new Put(Bytes.toBytes(putRecord)) put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("foo"), Bytes.toBytes("bar")) put } else { null } }) ssc.start() ssc.awaitTerminationOrTimeout(60000) } finally { sc.stop() } } }
Example 6
Source File: HBaseBulkPutExampleFromFile.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.hadoop.io.LongWritable import org.apache.hadoop.io.Text import org.apache.hadoop.mapred.TextInputFormat import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkPutExampleFromFile { def main(args: Array[String]) { if (args.length < 3) { println("HBaseBulkPutExampleFromFile {tableName} {columnFamily} {inputFile} are missing an argument") return } val tableName = args(0) val columnFamily = args(1) val inputFile = args(2) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExampleFromFile " + tableName + " " + columnFamily + " " + inputFile) val sc = new SparkContext(sparkConf) try { var rdd = sc.hadoopFile( inputFile, classOf[TextInputFormat], classOf[LongWritable], classOf[Text]).map(v => { System.out.println("reading-" + v._2.toString) v._2.toString }) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkPut[String](rdd, TableName.valueOf(tableName), (putRecord) => { System.out.println("hbase-" + putRecord) val put = new Put(Bytes.toBytes("Value- " + putRecord)) put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("1"), Bytes.toBytes(putRecord.length())) put }); } finally { sc.stop() } } }
Example 7
Source File: HBaseBulkPutExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkPutExample { def main(args: Array[String]) { if (args.length < 2) { println("HBaseBulkPutExample {tableName} {columnFamily} are missing an arguments") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] val rdd = sc.parallelize(Array( (Bytes.toBytes("1"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("2"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("3"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("4"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("5"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))) )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd, TableName.valueOf(tableName), (putRecord) => { val put = new Put(putRecord._1) putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) put }); } finally { sc.stop() } } }
Example 8
Source File: HBaseBulkPutTimestampExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Put import org.apache.spark.SparkConf import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkPutTimestampExample { def main(args: Array[String]) { if (args.length < 2) { System.out.println("HBaseBulkPutTimestampExample {tableName} {columnFamily} are missing an argument") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutTimestampExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { val rdd = sc.parallelize(Array( (Bytes.toBytes("6"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("7"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("8"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("9"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("10"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))))) val conf = HBaseConfiguration.create() val timeStamp = System.currentTimeMillis() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd, TableName.valueOf(tableName), (putRecord) => { val put = new Put(putRecord._1) putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, timeStamp, putValue._3)) put }) } finally { sc.stop() } } }
Example 9
Source File: HBasePut.scala From gimel with Apache License 2.0 | 5 votes |
package com.paypal.gimel.hbase.utilities import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.hadoop.hbase.client.{ConnectionFactory, Put} import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.sql.{DataFrame, SparkSession} import com.paypal.gimel.hbase.conf.{HbaseClientConfiguration, HbaseConfigs} import com.paypal.gimel.logger.Logger object HBasePut { def apply(sparkSession: SparkSession): HBasePut = new HBasePut(sparkSession) } class HBasePut(sparkSession: SparkSession) { val logger = Logger() lazy val hbaseUtilities = HBaseUtilities(sparkSession) def putRows(hbaseTable: String, dataFrame: DataFrame, rowKeyColumn: String, columns: Array[String], cfColsMap: Map[String, String]) { try { // Configure And Connect val conf = HBaseConfiguration.create() val cnxn = ConnectionFactory.createConnection(conf) // Create Connection to HBase table val tbl = cnxn.getTable(TableName.valueOf(hbaseTable)) val rows = dataFrame.rdd.map { row => (row.getAs(rowKeyColumn).toString, columns.map(eachCol => (cfColsMap.getOrElse(eachCol, ""), eachCol, row.getAs(eachCol).asInstanceOf[String])) ) }.collect() // Performing put operation on each row of dataframe rows.foreach { row => val putRow: Put = new Put(Bytes.toBytes(row._1.asInstanceOf[String])) row._2.foreach(x => if (x._2 != rowKeyColumn) putRow.addColumn(Bytes.toBytes(x._1), Bytes.toBytes(x._2), Bytes.toBytes(x._3))) tbl.put(putRow) } tbl.close() } catch { case ex: Throwable => ex.printStackTrace() throw ex } } }
Example 10
Source File: Hdfs2HBase.scala From wow-spark with MIT License | 5 votes |
package com.sev7e0.wow.hbase import java.util import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.hadoop.hbase.client.{ConnectionFactory, Put} import org.apache.spark.{SparkConf, SparkContext} object Hdfs2HBase { def main(args: Array[String]): Unit = { val conf = new SparkConf() .setMaster("spark://spark01:7077") .setAppName(Hdfs2HBase.getClass.getName) .set("spark.jars", "target/wow-spark-1.0-SNAPSHOT.jar") val sparkContext = new SparkContext(conf) val userRDD = sparkContext.textFile("hdfs://spark01:9000/spark/users.dat",2).map(_.split("::")) userRDD.foreachPartition(iter =>{ val configuration = HBaseConfiguration.create() // configuration.set("hbase.zookeeper.quorum","spark01:2181,spark02:2181,spark03:2181") configuration.set("hbase.zookeeper.quorum", "spark01") configuration.set("hbase.zookeeper.property.clientPort", "2181") //创建连接 val connection = ConnectionFactory.createConnection(configuration) //get table object val person = connection.getTable(TableName.valueOf("users")) iter.foreach(p=>{ val arrayList = new util.ArrayList[Put]() val put = new Put(p(0).getBytes) arrayList.add(put.addColumn("f1".getBytes,"gender".getBytes,p(1).getBytes)) arrayList.add(put.addColumn("f1".getBytes,"age".getBytes,p(2).getBytes)) arrayList.add(put.addColumn("f2".getBytes,"position".getBytes,p(3).getBytes)) arrayList.add(put.addColumn("f2".getBytes,"code".getBytes,p(4).getBytes)) person.put(arrayList) }) }) sparkContext.stop() } }
Example 11
Source File: HogEvent.scala From hogzilla with GNU General Public License v2.0 | 5 votes |
package org.hogzilla.event import java.util.HashMap import java.util.Map import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.util.Bytes import org.hogzilla.hbase.HogHBaseRDD import org.hogzilla.util.HogFlow import java.net.InetAddress class HogEvent(flow:HogFlow) { var sensorid:Int=0 var signature_id:Double=0 var priorityid:Int=0 var text:String="" var data:Map[String,String]=new HashMap() var ports:String="" var title:String="" var username:String="" var coords:String="" def formatIPtoBytes(ip:String):Array[Byte] = { try { // Eca! Snorby doesn't support IPv6 yet. See https://github.com/Snorby/snorby/issues/65 if(ip.contains(":")) InetAddress.getByName("255.255.6.6").getAddress else InetAddress.getByName(ip).getAddress } catch { case t: Throwable => // Bogus address! InetAddress.getByName("255.255.1.1").getAddress } } def alert() { val put = new Put(Bytes.toBytes(flow.get("flow:id"))) put.add(Bytes.toBytes("event"), Bytes.toBytes("note"), Bytes.toBytes(text)) put.add(Bytes.toBytes("event"), Bytes.toBytes("lower_ip"), formatIPtoBytes(flow.lower_ip)) put.add(Bytes.toBytes("event"), Bytes.toBytes("upper_ip"), formatIPtoBytes(flow.upper_ip)) put.add(Bytes.toBytes("event"), Bytes.toBytes("lower_ip_str"), Bytes.toBytes(flow.lower_ip)) put.add(Bytes.toBytes("event"), Bytes.toBytes("upper_ip_str"), Bytes.toBytes(flow.upper_ip)) put.add(Bytes.toBytes("event"), Bytes.toBytes("signature_id"), Bytes.toBytes("%.0f".format(signature_id))) put.add(Bytes.toBytes("event"), Bytes.toBytes("time"), Bytes.toBytes(System.currentTimeMillis)) put.add(Bytes.toBytes("event"), Bytes.toBytes("ports"), Bytes.toBytes(ports)) put.add(Bytes.toBytes("event"), Bytes.toBytes("title"), Bytes.toBytes(title)) if(!username.equals("")) put.add(Bytes.toBytes("event"), Bytes.toBytes("username"), Bytes.toBytes(username)) if(!coords.equals("")) put.add(Bytes.toBytes("event"), Bytes.toBytes("coords"), Bytes.toBytes(coords)) HogHBaseRDD.hogzilla_events.put(put) //println(f"ALERT: $text%100s\n\n@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") } }
Example 12
Source File: HogSignature.scala From hogzilla with GNU General Public License v2.0 | 5 votes |
package org.hogzilla.event import org.hogzilla.hbase.HogHBaseRDD import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Get import org.apache.hadoop.hbase.client.Put case class HogSignature(signature_class:Int, signature_name:String, signature_priority:Int, signature_revision:Int, signature_id:Double,signature_group_id:Int) { //Example: 3,"HZ: Suspicious DNS flow identified by K-Means clustering",2,1,826000001,826 def saveHBase():HogSignature = { val get = new Get(Bytes.toBytes("%.0f".format(signature_id))) if(!HogHBaseRDD.hogzilla_sensor.exists(get)) { val put = new Put(Bytes.toBytes("%.0f".format(signature_id))) put.add(Bytes.toBytes("signature"), Bytes.toBytes("id"), Bytes.toBytes("%.0f".format(signature_id))) put.add(Bytes.toBytes("signature"), Bytes.toBytes("class"), Bytes.toBytes(signature_class.toString())) put.add(Bytes.toBytes("signature"), Bytes.toBytes("name"), Bytes.toBytes(signature_name)) put.add(Bytes.toBytes("signature"), Bytes.toBytes("priority"), Bytes.toBytes(signature_priority.toString())) put.add(Bytes.toBytes("signature"), Bytes.toBytes("revision"), Bytes.toBytes(signature_revision.toString())) put.add(Bytes.toBytes("signature"), Bytes.toBytes("group_id"), Bytes.toBytes(signature_group_id.toString())) HogHBaseRDD.hogzilla_signatures.put(put) } this } }
Example 13
Source File: HbRddWriter.scala From hbrdd with Apache License 2.0 | 5 votes |
package top.spoofer.hbrdd.hbsupport import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.spark.rdd.RDD import top.spoofer.hbrdd.config.HbRddConfig import top.spoofer.hbrdd.unit.HbRddFormatsWriter import top.spoofer.hbrdd._ import HbRddWritPuter._ trait HbRddWriter { type TsValue[A] = (Long, A) // (ts, A) val LATEST_TIMESTAMP = Long.MaxValue final class SingleFamilyRDDWriter[A]( val rdd: RDD[(String, Map[String, A])], val put: HbRddPuter[A] ) extends HbRddWritCommon[A] with Serializable { def put2Hbase(tableName: String, family: String)(implicit config: HbRddConfig) = { val job = createJob(tableName, config.getHbaseConfig) rdd.flatMap({ case (rowId, data) => convert2Writable(rowId, Map(family -> data), put) }) .saveAsNewAPIHadoopDataset(job.getConfiguration) } }
Example 14
Source File: HogHBaseCluster.scala From hogzilla with GNU General Public License v2.0 | 5 votes |
package org.hogzilla.hbase import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.rdd.RDD import org.apache.spark.mllib.linalg.Vector import org.apache.hadoop.hbase.client.Get import org.apache.hadoop.hbase.client.Delete import org.hogzilla.cluster.HogClusterMember object HogHBaseCluster { def formatClusterTitle(clusterCentroid: List[(Long,Double)], clusterIdx:Int):String = { val mainTitle = "Group "+clusterIdx.toString+" - "+ clusterCentroid .filter({case (port,rate) => rate > 4.999 }) .map({case (port,rate) => port.toString()+":"+"%.0f".format(rate)+"%" }).mkString(", ") val onePercentList= clusterCentroid .filter({case (port,rate) => .9999 < rate & rate < 5 }) if(onePercentList.size>0) { mainTitle+", "+ onePercentList.map({case (port,rate) => port.toString() }).mkString("(",", ",")"+"> 1%") }else { mainTitle } } def deleteCluster(clusterIdx:Int)= { val del = new Delete(Bytes.toBytes(clusterIdx.toString)) HogHBaseRDD.hogzilla_clusters.delete(del) } def deleteClusterMember(memberIP:String)= { val del = new Delete(Bytes.toBytes(memberIP)) HogHBaseRDD.hogzilla_cluster_members.delete(del) } def saveCluster(clusterIdx:Int, clusterCentroid:List[(Long,Double)], clusterSize: Long, members:Array[String]) = { val memberString = members.mkString(",") val put = new Put(Bytes.toBytes(clusterIdx.toString)) put.add(Bytes.toBytes("info"), Bytes.toBytes("title"), Bytes.toBytes(formatClusterTitle(clusterCentroid,clusterIdx))) put.add(Bytes.toBytes("info"), Bytes.toBytes("size"), Bytes.toBytes(clusterSize.toString)) put.add(Bytes.toBytes("info"), Bytes.toBytes("centroid"), Bytes.toBytes(clusterCentroid.mkString("[",",","]"))) put.add(Bytes.toBytes("info"), Bytes.toBytes("members"), Bytes.toBytes(memberString)) HogHBaseRDD.hogzilla_clusters.put(put) } def saveClusterMember(clusterMember:HogClusterMember) = { val put = new Put(Bytes.toBytes(clusterMember.memberIP.toString)) put.add(Bytes.toBytes("info"), Bytes.toBytes("title"), Bytes.toBytes(clusterMember.formatTitle)) put.add(Bytes.toBytes("cluster"),Bytes.toBytes("size"), Bytes.toBytes(clusterMember.clusterSize.toString)) put.add(Bytes.toBytes("cluster"),Bytes.toBytes("centroid"), Bytes.toBytes(clusterMember.centroid.mkString("[",",","]"))) put.add(Bytes.toBytes("cluster"),Bytes.toBytes("idx"), Bytes.toBytes(clusterMember.clusterIdx.toString)) put.add(Bytes.toBytes("cluster"),Bytes.toBytes("description"),Bytes.toBytes(formatClusterTitle(clusterMember.centroid,clusterMember.clusterIdx))) put.add(Bytes.toBytes("member"), Bytes.toBytes("ports"), Bytes.toBytes("TCP: "+clusterMember.ports.mkString(""," ",""))) put.add(Bytes.toBytes("member"), Bytes.toBytes("frequencies"),Bytes.toBytes("TCP: "+ clusterMember.frequency_vector .filter({case (port,freq) => clusterMember.ports.contains(port)}) .map({case (port,freq) => port.toString+"="+ "%.0f".format(freq)+"%" }) .mkString(""," ","") )) put.add(Bytes.toBytes("member"), Bytes.toBytes("ip"), Bytes.toBytes(clusterMember.memberIP)) put.add(Bytes.toBytes("member"), Bytes.toBytes("distance"), Bytes.toBytes("%.2f".format(clusterMember.distance))) HogHBaseRDD.hogzilla_cluster_members.put(put) } }
Example 15
Source File: HBaseForeachPartitionExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.{SparkContext, SparkConf} object HBaseForeachPartitionExample { def main(args: Array[String]) { if (args.length < 2) { println("HBaseBulkPutExample {tableName} {columnFamily}") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] val rdd = sc.parallelize(Array( (Bytes.toBytes("1"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("2"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("3"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("4"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("5"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))) )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) rdd.hbaseForeachPartition(hbaseContext, (it, connection) => { val m = connection.getBufferedMutator(TableName.valueOf(tableName)) it.foreach(r => { val put = new Put(r._1) r._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) m.mutate(put) }) m.flush() m.close() }) } finally { sc.stop() } } }
Example 16
Source File: HBaseBulkPutExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.spark.{SparkConf, SparkContext} object HBaseBulkPutExample { def main(args: Array[String]) { if (args.length < 2) { println("HBaseBulkPutExample {tableName} {columnFamily}") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] val rdd = sc.parallelize(Array( (Bytes.toBytes("1"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("2"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("3"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("4"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("5"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))) )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) rdd.hbaseBulkPut(hbaseContext, TableName.valueOf(tableName), (putRecord) => { val put = new Put(putRecord._1) putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) put }) } finally { sc.stop() } } }
Example 17
Source File: HBaseBulkPutExampleFromFile.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.mapred.TextInputFormat import org.apache.hadoop.io.LongWritable import org.apache.hadoop.io.Text import org.apache.spark.SparkConf object HBaseBulkPutExampleFromFile { def main(args: Array[String]) { if (args.length < 3) { println("HBaseBulkPutExampleFromFile {tableName} {columnFamily} {inputFile}") return } val tableName = args(0) val columnFamily = args(1) val inputFile = args(2) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExampleFromFile " + tableName + " " + columnFamily + " " + inputFile) val sc = new SparkContext(sparkConf) try { var rdd = sc.hadoopFile( inputFile, classOf[TextInputFormat], classOf[LongWritable], classOf[Text]).map(v => { System.out.println("reading-" + v._2.toString) v._2.toString }) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkPut[String](rdd, TableName.valueOf(tableName), (putRecord) => { System.out.println("hbase-" + putRecord) val put = new Put(Bytes.toBytes("Value- " + putRecord)) put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("1"), Bytes.toBytes(putRecord.length())) put }); } finally { sc.stop() } } }
Example 18
Source File: HBaseBulkPutExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Put import org.apache.spark.SparkConf object HBaseBulkPutExample { def main(args: Array[String]) { if (args.length < 2) { println("HBaseBulkPutExample {tableName} {columnFamily}") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] val rdd = sc.parallelize(Array( (Bytes.toBytes("1"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("2"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("3"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("4"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("5"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))) )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd, TableName.valueOf(tableName), (putRecord) => { val put = new Put(putRecord._1) putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) put }); } finally { sc.stop() } } }
Example 19
Source File: HBaseBulkPutTimestampExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Put import org.apache.spark.SparkConf object HBaseBulkPutTimestampExample { def main(args: Array[String]) { if (args.length < 2) { System.out.println("HBaseBulkPutTimestampExample {tableName} {columnFamily}") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutTimestampExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { val rdd = sc.parallelize(Array( (Bytes.toBytes("6"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("7"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("8"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("9"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("10"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))))) val conf = HBaseConfiguration.create() val timeStamp = System.currentTimeMillis() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd, TableName.valueOf(tableName), (putRecord) => { val put = new Put(putRecord._1) putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, timeStamp, putValue._3)) put }) } finally { sc.stop() } } }
Example 20
Source File: SparkApplicationTester.scala From TopNotch with Apache License 2.0 | 5 votes |
package com.bfm.topnotch import org.scalatest.OneInstancePerTest import org.apache.hadoop.hbase.CellUtil import org.apache.hadoop.hbase.client.{HConnection, HTableInterface, Put} import org.apache.spark._ import org.apache.spark.sql.SparkSession import org.scalamock.scalatest.MockFactory import org.scalatest.FlatSpec import com.typesafe.scalalogging.StrictLogging /** * This class handles some of the boilerplate of testing SparkApplications with HBase writers */ abstract class SparkApplicationTester extends FlatSpec with OneInstancePerTest with MockFactory with StrictLogging with SharedSparkContext { protected val hconn = mock[HConnection] lazy val spark = SparkSession .builder() .appName(getClass.getName) .master("local") .config("spark.sql.shuffle.partitions", "4") //setting this to false to emulate HiveQL's case insensitivity for column names .config("spark.sql.caseSensitive", "false") .getOrCreate() /** * Verify that the next HTable will receive the correct puts. Call this once per HTable that is supposed to be created and written to. * Note: All HBase tests for a SparkApplication object must be run sequentially in order for us to keep track of HTableInterface mocks * @param tests The test's expected name for the HTable and expected values for the Put objects placed in the HTable * @param acceptAnyPut Tells the mock to accept any put value. This is useful for tests using the mock and but not * testing what is put inside it. */ def setHBaseMock(tests: HTableParams, acceptAnyPut: Boolean = false): Unit = { val table = mock[HTableInterface] inSequence { (hconn.getTable(_: String)).expects(tests.tableName).returning(table) inAnyOrder { for (correctPut <- tests.puts) { if (acceptAnyPut) { (table.put(_: Put)).expects(*) } else { (table.put(_: Put)).expects(where { (actualPut: Put) => val actualValue = CellUtil.cloneValue(actualPut.get(correctPut.columnFamily, correctPut.columnQualifier).get(0)) correctPut.valueTest(actualValue) // just return true, as if issues, will have exception thrown by value test true }) } } } (table.close _).expects().returns() } } /** * Set the next HTable will accept anything accept anything. This is useful if testing a thing that needs an hbase * table, but the specific test isn't testing the hbase functionality. * * @param tableName the name of the table that will be accessed. */ def allowAnyHBaseActions(tableName: String): Unit ={ setHBaseMock(new HTableParams(tableName, Seq(null)), true) } /** * The set of parameters defining what values should be used to create the HTable * @param tableName The name of the table the test expects to be created * @param puts The list of parameters for the puts that the test expects to be placed in the table */ case class HTableParams( tableName: String, puts: Seq[HPutParams] ) /** * The list of values that the test expects to be in a put. * @param row The name of the row to put into HBase * @param columnFamily The cell's column family * @param columnQualifier The cell's column qualifier * @param correctString A string representing the correct value or an error message * @param valueTest The method for checking if the value in the cell is correct. Done as the actual and intended values * in a cell may be equal even if they don't have the expression as an array of bytes. * This should throw an exception on failure, using a call like shouldBe */ case class HPutParams( row: Array[Byte], columnFamily: Array[Byte], columnQualifier: Array[Byte], correctString: String, valueTest: Array[Byte] => Unit ) }
Example 21
Source File: HBaseUtil.scala From sprue with Apache License 2.0 | 5 votes |
package com.cloudera.sprue import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.util.Bytes object HBaseUtil { val columnFamily: String = "cf1" def insertIncomingDataIntoHBase(patient: Patient): Put = { if (patient.getPatientId == null) { return null } else { val put = new Put(Bytes.toBytes(patient.getPatientId)) if (patient.getPatientId != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("patientId"), Bytes.toBytes(patient.getPatientId)) if (patient.getLocation != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("location"), Bytes.toBytes(patient.getLocation)) if ((patient.getEvaluationDate : java.lang.Long) != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("evaluationDate"), Bytes.toBytes(patient.getEvaluationDate)) if ((patient.getTemperature : java.lang.Float) != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("temperature"), Bytes.toBytes(patient.getTemperature)) if ((patient.getWbc : java.lang.Integer) != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("wbc"), Bytes.toBytes(patient.getWbc)) if ((patient.getHeartRate : java.lang.Integer) != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("heartRate"), Bytes.toBytes(patient.getHeartRate)) if ((patient.getRespiratoryRate : java.lang.Integer) != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("respiratoryRate"), Bytes.toBytes(patient.getRespiratoryRate)) if ((patient.getSbp : java.lang.Integer) != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("sbp"), Bytes.toBytes(patient.getSbp)) if ((patient.getHypotension : java.lang.Integer) != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("hypotension"), Bytes.toBytes(patient.getHypotension)) if (patient.getInfectionFlag != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("infectionFlag"), Bytes.toBytes(patient.getInfectionFlag)) if ((patient.getOrganFailCount : java.lang.Integer) != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("organFailCount"), Bytes.toBytes(patient.getOrganFailCount)) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("evalFinished"), Bytes.toBytes("N")) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("recordUpdatedTime"), Bytes.toBytes(System.currentTimeMillis)) } } def insertEvaluatedDataIntoHBase(patient: Patient): Put = { if (patient.getPatientId == null) { return null } else { val put = new Put(Bytes.toBytes(patient.getPatientId)) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("evalFinished"), Bytes.toBytes("Y")) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("sirsCounter"), Bytes.toBytes(patient.getSirsCounter)) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("sirsFlag"), Bytes.toBytes(patient.getSirsFlag)) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("sepsisFlag"), Bytes.toBytes(patient.getSepsisFlag)) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("severeSepsisFlag"), Bytes.toBytes(patient.getSevereSepsisFlag)) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("septicShockFlag"), Bytes.toBytes(patient.getSepticShockFlag)) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("organDysfunctionFlag"), Bytes.toBytes(patient.getOrganDysfunctionSyndrome)) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("systemEvalTime"), Bytes.toBytes(System.currentTimeMillis)) put } } }
Example 22
Source File: L6-14HBase.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.mapreduce.TableOutputFormat import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.io.Text import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD.rddToPairRDDFunctions import org.apache.spark.streaming.Seconds import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions import org.json4s.DefaultFormats import org.json4s.jvalue2extractable import org.json4s.jvalue2monadic import org.json4s.native.JsonMethods.parse import org.json4s.string2JsonInput object HBaseSinkApp { def main(args: Array[String]) { if (args.length != 5) { System.err.println( "Usage: HBaseSinkApp <appname> <hbaseMaster> <tableName> <columnFamilyName> <columnName>") System.exit(1) } val Seq(appName, hbaseMaster, tableName, columnFamilyName, columnName) = args.toSeq val conf = new SparkConf() .setAppName(appName) .setJars(SparkContext.jarOfClass(this.getClass).toSeq) val batchInterval = 10 val windowSize = 20 val slideInterval = 10 val ssc = new StreamingContext(conf, Seconds(batchInterval)) HttpUtils.createStream(ssc, url = "https://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20yahoo.finance.quotes%20where%20symbol%20in%20(%22IBM,GOOG,MSFT,AAPL,FB,ORCL,YHOO,TWTR,LNKD,INTC%22)%0A%09%09&format=json&diagnostics=true&env=http%3A%2F%2Fdatatables.org%2Falltables.env", interval = batchInterval) .flatMap(rec => { implicit val formats = DefaultFormats val query = parse(rec) \ "query" ((query \ "results" \ "quote").children) .map(rec => ((rec \ "symbol").extract[String], (rec \ "LastTradePriceOnly").extract[String].toFloat)) }) .reduceByKeyAndWindow((x: Float, y: Float) => (x + y), Seconds(windowSize), Seconds(slideInterval)) .foreachRDD(rdd => { val hbaseConf = HBaseConfiguration.create() hbaseConf.set(TableOutputFormat.OUTPUT_TABLE, tableName) hbaseConf.set("hbase.master", hbaseMaster) val jobConf = new Configuration(hbaseConf) jobConf.set("mapreduce.job.outputformat.class", classOf[TableOutputFormat[Text]].getName) rdd.map(rec => { val put = new Put(rec._1.getBytes) put.addColumn(columnFamilyName.getBytes, columnName.getBytes, Bytes.toBytes(rec._2 / (windowSize / batchInterval))) (rec._1, put) }).saveAsNewAPIHadoopDataset(jobConf) }) ssc.start() ssc.awaitTermination() } }
Example 23
Source File: L6-16SparkHBase.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.streaming.Seconds import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions import org.json4s.DefaultFormats import org.json4s.jvalue2extractable import org.json4s.jvalue2monadic import org.json4s.native.JsonMethods.parse import org.json4s.string2JsonInput object SparkHBaseBulkPutApp { def main(args: Array[String]) { if (args.length != 4) { System.err.println( "Usage: SparkHBaseBulkPutApp <appname> <tableName> <columnFamilyName> <columnName>") System.exit(1) } val Seq(appName, tableName, columnFamilyName, columnName) = args.toSeq val conf = new SparkConf() .setAppName(appName) .setJars(SparkContext.jarOfClass(this.getClass).toSeq) val batchInterval = 10 val windowSize = 20 val slideInterval = 10 val ssc = new StreamingContext(conf, Seconds(batchInterval)) val hbaseConf = HBaseConfiguration.create() val hContext = new HBaseContext(ssc.sparkContext, hbaseConf) val windowed = HttpUtils.createStream(ssc, url = "https://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20yahoo.finance.quotes%20where%20symbol%20in%20(%22IBM,GOOG,MSFT,AAPL,FB,ORCL,YHOO,TWTR,LNKD,INTC%22)%0A%09%09&format=json&diagnostics=true&env=http%3A%2F%2Fdatatables.org%2Falltables.env", interval = batchInterval) .flatMap(rec => { implicit val formats = DefaultFormats val query = parse(rec) \ "query" ((query \ "results" \ "quote").children) .map(rec => ((rec \ "symbol").extract[String], (rec \ "LastTradePriceOnly").extract[String].toFloat)) }) .reduceByKeyAndWindow((x: Float, y: Float) => (x + y), Seconds(windowSize), Seconds(slideInterval)) hContext.streamBulkPut[(String, Float)](windowed, TableName.valueOf(tableName), rec => { val put = new Put(rec._1.getBytes) put.addColumn(columnFamilyName.getBytes, columnName.getBytes, Bytes.toBytes(rec._2 / (windowSize / batchInterval))) put }) ssc.start() ssc.awaitTermination() } }
Example 24
Source File: HBaseStreamingBulkPutExample.scala From SparkOnHBase with Apache License 2.0 | 4 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Put import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.Seconds import org.apache.spark.SparkConf object HBaseStreamingBulkPutExample { def main(args: Array[String]) { if (args.length < 4) { println("HBaseStreamingBulkPutExample " + "{host} {port} {tableName} {columnFamily}") return } val host = args(0) val port = args(1) val tableName = args(2) val columnFamily = args(3) val sparkConf = new SparkConf().setAppName("HBaseBulkPutTimestampExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { val ssc = new StreamingContext(sc, Seconds(1)) val lines = ssc.socketTextStream(host, port.toInt) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.streamBulkPut[String](lines, TableName.valueOf(tableName), (putRecord) => { if (putRecord.length() > 0) { val put = new Put(Bytes.toBytes(putRecord)) put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("foo"), Bytes.toBytes("bar")) put } else { null } }) ssc.start() ssc.awaitTerminationOrTimeout(60000) } finally { sc.stop() } } }