org.apache.commons.logging.Log Scala Examples
The following examples show how to use org.apache.commons.logging.Log.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: SparkSQLSessionManager.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.util.concurrent.Executors import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hive.service.cli.SessionHandle import org.apache.hive.service.cli.session.SessionManager import org.apache.hive.service.cli.thrift.TProtocolVersion import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.SQLContext import org.apache.spark.sql.hive.{HiveSessionState, HiveUtils} import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ import org.apache.spark.sql.hive.thriftserver.server.SparkSQLOperationManager private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext: SQLContext) extends SessionManager(hiveServer) with ReflectedCompositeService { private lazy val sparkSqlOperationManager = new SparkSQLOperationManager() override def init(hiveConf: HiveConf) { setSuperField(this, "hiveConf", hiveConf) // Create operation log root directory, if operation logging is enabled if (hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED)) { invoke(classOf[SessionManager], this, "initOperationLogRootDir") } val backgroundPoolSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_THREADS) setSuperField(this, "backgroundOperationPool", Executors.newFixedThreadPool(backgroundPoolSize)) getAncestorField[Log](this, 3, "LOG").info( s"HiveServer2: Async execution pool size $backgroundPoolSize") setSuperField(this, "operationManager", sparkSqlOperationManager) addService(sparkSqlOperationManager) initCompositeService(hiveConf) } override def openSession( protocol: TProtocolVersion, username: String, passwd: String, ipAddress: String, sessionConf: java.util.Map[String, String], withImpersonation: Boolean, delegationToken: String): SessionHandle = { val sessionHandle = super.openSession(protocol, username, passwd, ipAddress, sessionConf, withImpersonation, delegationToken) val session = super.getSession(sessionHandle) HiveThriftServer2.listener.onSessionCreated( session.getIpAddress, sessionHandle.getSessionId.toString, session.getUsername) val sessionState = sqlContext.sessionState.asInstanceOf[HiveSessionState] val ctx = if (sessionState.hiveThriftServerSingleSession) { sqlContext } else { sqlContext.newSession() } ctx.setConf("spark.sql.hive.version", HiveUtils.hiveExecutionVersion) if (sessionConf != null && sessionConf.containsKey("use:database")) { ctx.sql(s"use ${sessionConf.get("use:database")}") } sparkSqlOperationManager.sessionToContexts.put(sessionHandle, ctx) sessionHandle } override def closeSession(sessionHandle: SessionHandle) { HiveThriftServer2.listener.onSessionClosed(sessionHandle.getSessionId.toString) super.closeSession(sessionHandle) sparkSqlOperationManager.sessionToActivePool.remove(sessionHandle) sparkSqlOperationManager.sessionToContexts.remove(sessionHandle) } }
Example 2
Source File: LocalMemoryDataBlock.scala From sona with Apache License 2.0 | 5 votes |
package com.tencent.angel.sona.data import java.io.IOException import java.util import java.util.Collections import com.tencent.angel.ml.math2.utils.{DataBlock, LabeledData} import org.apache.commons.logging.{Log, LogFactory} import org.ehcache.sizeof.SizeOf class LocalMemoryDataBlock(initSize: Int, maxUseMemroy: Long) extends DataBlock[LabeledData] { private val LOG: Log = LogFactory.getLog(classOf[LocalMemoryDataBlock]) private var estimateSampleNumber: Int = 100 val initCapacity = if (initSize > 0) { estimateSampleNumber = initSize initSize } else { estimateSampleNumber } private val vList = new util.ArrayList[LabeledData]() private var isFull: Boolean = false @throws[IOException] override def read(): LabeledData = { if (readIndex < writeIndex) { val value = vList.get(readIndex) readIndex += 1 value } else { null.asInstanceOf[LabeledData] } } @throws[IOException] override protected def hasNext: Boolean = readIndex < writeIndex @throws[IOException] override def get(index: Int): LabeledData = { if (index < 0 || index >= writeIndex) { throw new IOException("index not in range[0," + writeIndex + ")") } vList.get(index) } @throws[IOException] override def put(value: LabeledData): Unit = { if (writeIndex < estimateSampleNumber) { vList.add(value) writeIndex += 1 if (writeIndex == estimateSampleNumber && !isFull) { estimateAndResizeVList() } } else { LOG.info("Over maxUseMemroy, No value added!") } } override def resetReadIndex(): Unit = { readIndex = 0 } override def clean(): Unit = { readIndex = 0 writeIndex = 0 vList.clear() } override def shuffle(): Unit = Collections.shuffle(vList) override def flush(): Unit = {} override def slice(startIndex: Int, length: Int): DataBlock[LabeledData] = ??? private def estimateAndResizeVList(): Unit = { val avgDataItemSize = (SizeOf.newInstance().deepSizeOf(vList) + vList.size - 1) / vList.size val maxStoreNum = (maxUseMemroy / avgDataItemSize).toInt val capacity = if (maxStoreNum < 2 * vList.size) { isFull = true maxStoreNum } else { 2 * vList.size } estimateSampleNumber = (0.8 * capacity).toInt vList.ensureCapacity(capacity) LOG.debug("estimate sample number=" + vList.size + ", avgDataItemSize=" + avgDataItemSize + ", maxStoreNum=" + maxStoreNum + ", maxUseMemroy=" + maxUseMemroy) } }
Example 3
Source File: SparkSQLSessionManager.scala From bdg-sequila with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.util.concurrent.Executors import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hive.service.cli.SessionHandle import org.apache.hive.service.cli.session.SessionManager import org.apache.hive.service.cli.thrift.TProtocolVersion import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.{SQLContext, SequilaSession} import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ import org.apache.spark.sql.hive.thriftserver.server.SparkSQLOperationManagerSeq private[hive] class SparkSQLSessionManagerSeq(hiveServer: HiveServer2, ss: SequilaSession) extends SessionManager(hiveServer) with ReflectedCompositeService { private lazy val sparkSqlOperationManager = new SparkSQLOperationManagerSeq(ss) override def init(hiveConf: HiveConf) { setSuperField(this, "operationManager", sparkSqlOperationManager) super.init(hiveConf) } override def openSession( protocol: TProtocolVersion, username: String, passwd: String, ipAddress: String, sessionConf: java.util.Map[String, String], withImpersonation: Boolean, delegationToken: String): SessionHandle = { val sessionHandle = super.openSession(protocol, username, passwd, ipAddress, sessionConf, withImpersonation, delegationToken) val session = super.getSession(sessionHandle) HiveThriftServer2Seq.listener.onSessionCreated( session.getIpAddress, sessionHandle.getSessionId.toString, session.getUsername) val ctx = if (ss.sqlContext.conf.hiveThriftServerSingleSession) { ss.sqlContext } else { ss.sqlContext.newSession() } //ctx.setConf(HiveUtils.FAKE_HIVE_VERSION.key, HiveUtils.builtinHiveVersion) if (sessionConf != null && sessionConf.containsKey("use:database")) { ctx.sql(s"use ${sessionConf.get("use:database")}") } sparkSqlOperationManager.sessionToContexts.put(sessionHandle, ctx) sessionHandle } override def closeSession(sessionHandle: SessionHandle) { HiveThriftServer2Seq.listener.onSessionClosed(sessionHandle.getSessionId.toString) super.closeSession(sessionHandle) sparkSqlOperationManager.sessionToActivePool.remove(sessionHandle) sparkSqlOperationManager.sessionToContexts.remove(sessionHandle) } }
Example 4
Source File: SparkSQLCLIService.scala From bdg-sequila with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.io.IOException import java.util.{List => JList} import javax.security.auth.login.LoginException import scala.collection.JavaConverters._ import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hadoop.hive.shims.Utils import org.apache.hadoop.security.{SecurityUtil, UserGroupInformation} import org.apache.hive.service.{AbstractService, Service, ServiceException} import org.apache.hive.service.Service.STATE import org.apache.hive.service.auth.HiveAuthFactory import org.apache.hive.service.cli._ import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.{SQLContext, SequilaSession} import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ class SparkSQLCLIServiceSeq(hiveServer: HiveServer2, ss: SequilaSession) extends CLIService(hiveServer) with ReflectedCompositeService { override def init(hiveConf: HiveConf) { setSuperField(this, "hiveConf", hiveConf) val sparkSqlSessionManager = new SparkSQLSessionManagerSeq(hiveServer, ss) setSuperField(this, "sessionManager", sparkSqlSessionManager) addService(sparkSqlSessionManager) var sparkServiceUGI: UserGroupInformation = null var httpUGI: UserGroupInformation = null if (UserGroupInformation.isSecurityEnabled) { try { val principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL) val keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB) if (principal.isEmpty || keyTabFile.isEmpty) { throw new IOException( "HiveServer2 Kerberos principal or keytab is not correctly configured") } val originalUgi = UserGroupInformation.getCurrentUser // sparkServiceUGI = if (HiveAuthFactory.needUgiLogin(originalUgi, // SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keyTabFile)) { // HiveAuthFactory.loginFromKeytab(hiveConf) // Utils.getUGI() // } else { // originalUgi // } sparkServiceUGI = originalUgi setSuperField(this, "serviceUGI", sparkServiceUGI) } catch { case e @ (_: IOException | _: LoginException) => throw new ServiceException("Unable to login to kerberos with given principal/keytab", e) } // Try creating spnego UGI if it is configured. val principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_PRINCIPAL).trim val keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_KEYTAB).trim if (principal.nonEmpty && keyTabFile.nonEmpty) { try { httpUGI = HiveAuthFactory.loginFromSpnegoKeytabAndReturnUGI(hiveConf) setSuperField(this, "httpUGI", httpUGI) } catch { case e: IOException => throw new ServiceException("Unable to login to spnego with given principal " + s"$principal and keytab $keyTabFile: $e", e) } } } initCompositeService(hiveConf) } override def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue = { getInfoType match { case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_VER => new GetInfoValue(ss.sqlContext.sparkContext.version) case _ => super.getInfo(sessionHandle, getInfoType) } } } trait ReflectedCompositeService { this: AbstractService => def initCompositeService(hiveConf: HiveConf) { // Emulating `CompositeService.init(hiveConf)` val serviceList = getAncestorField[JList[Service]](this, 2, "serviceList") serviceList.asScala.foreach(_.init(hiveConf)) // Emulating `AbstractService.init(hiveConf)` invoke(classOf[AbstractService], this, "ensureCurrentState", classOf[STATE] -> STATE.NOTINITED) setAncestorField(this, 3, "hiveConf", hiveConf) invoke(classOf[AbstractService], this, "changeState", classOf[STATE] -> STATE.INITED) getAncestorField[Log](this, 3, "LOG").info(s"Service: $getName is inited.") } }
Example 5
Source File: SparkSQLSessionManager.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.util.concurrent.Executors import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hive.service.cli.SessionHandle import org.apache.hive.service.cli.session.SessionManager import org.apache.hive.service.cli.thrift.TProtocolVersion import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ import org.apache.spark.sql.hive.thriftserver.server.SparkSQLOperationManager private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, hiveContext: HiveContext) extends SessionManager(hiveServer) with ReflectedCompositeService { private lazy val sparkSqlOperationManager = new SparkSQLOperationManager() override def init(hiveConf: HiveConf) { setSuperField(this, "hiveConf", hiveConf) // Create operation log root directory, if operation logging is enabled if (hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED)) { invoke(classOf[SessionManager], this, "initOperationLogRootDir") } val backgroundPoolSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_THREADS) setSuperField(this, "backgroundOperationPool", Executors.newFixedThreadPool(backgroundPoolSize)) getAncestorField[Log](this, 3, "LOG").info( s"HiveServer2: Async execution pool size $backgroundPoolSize") setSuperField(this, "operationManager", sparkSqlOperationManager) addService(sparkSqlOperationManager) initCompositeService(hiveConf) } override def openSession( protocol: TProtocolVersion, username: String, passwd: String, ipAddress: String, sessionConf: java.util.Map[String, String], withImpersonation: Boolean, delegationToken: String): SessionHandle = { val sessionHandle = super.openSession(protocol, username, passwd, ipAddress, sessionConf, withImpersonation, delegationToken) val session = super.getSession(sessionHandle) HiveThriftServer2.listener.onSessionCreated( session.getIpAddress, sessionHandle.getSessionId.toString, session.getUsername) val ctx = if (hiveContext.hiveThriftServerSingleSession) { hiveContext } else { hiveContext.newSession() } ctx.setConf("spark.sql.hive.version", HiveContext.hiveExecutionVersion) sparkSqlOperationManager.sessionToContexts += sessionHandle -> ctx sessionHandle } override def closeSession(sessionHandle: SessionHandle) { HiveThriftServer2.listener.onSessionClosed(sessionHandle.getSessionId.toString) super.closeSession(sessionHandle) sparkSqlOperationManager.sessionToActivePool -= sessionHandle sparkSqlOperationManager.sessionToContexts.remove(sessionHandle) } }
Example 6
Source File: SparkSQLCLIService.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.io.IOException import java.util.{List => JList} import javax.security.auth.login.LoginException import scala.collection.JavaConverters._ import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.shims.Utils import org.apache.hadoop.security.UserGroupInformation import org.apache.hive.service.Service.STATE import org.apache.hive.service.auth.HiveAuthFactory import org.apache.hive.service.cli._ import org.apache.hive.service.server.HiveServer2 import org.apache.hive.service.{AbstractService, Service, ServiceException} import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, hiveContext: HiveContext) extends CLIService(hiveServer) with ReflectedCompositeService { override def init(hiveConf: HiveConf) { setSuperField(this, "hiveConf", hiveConf) val sparkSqlSessionManager = new SparkSQLSessionManager(hiveServer, hiveContext) setSuperField(this, "sessionManager", sparkSqlSessionManager) addService(sparkSqlSessionManager) var sparkServiceUGI: UserGroupInformation = null if (UserGroupInformation.isSecurityEnabled) { try { HiveAuthFactory.loginFromKeytab(hiveConf) sparkServiceUGI = Utils.getUGI() setSuperField(this, "serviceUGI", sparkServiceUGI) } catch { case e @ (_: IOException | _: LoginException) => throw new ServiceException("Unable to login to kerberos with given principal/keytab", e) } } initCompositeService(hiveConf) } override def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue = { getInfoType match { case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_VER => new GetInfoValue(hiveContext.sparkContext.version) case _ => super.getInfo(sessionHandle, getInfoType) } } } private[thriftserver] trait ReflectedCompositeService { this: AbstractService => def initCompositeService(hiveConf: HiveConf) { // Emulating `CompositeService.init(hiveConf)` val serviceList = getAncestorField[JList[Service]](this, 2, "serviceList") serviceList.asScala.foreach(_.init(hiveConf)) // Emulating `AbstractService.init(hiveConf)` invoke(classOf[AbstractService], this, "ensureCurrentState", classOf[STATE] -> STATE.NOTINITED) setAncestorField(this, 3, "hiveConf", hiveConf) invoke(classOf[AbstractService], this, "changeState", classOf[STATE] -> STATE.INITED) getAncestorField[Log](this, 3, "LOG").info(s"Service: $getName is inited.") } }
Example 7
Source File: SparkSQLSessionManager.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.util.concurrent.Executors import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hive.service.cli.SessionHandle import org.apache.hive.service.cli.session.SessionManager import org.apache.hive.service.cli.thrift.TProtocolVersion import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.SQLContext import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ import org.apache.spark.sql.hive.thriftserver.server.SparkSQLOperationManager private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext: SQLContext) extends SessionManager(hiveServer) with ReflectedCompositeService { private lazy val sparkSqlOperationManager = new SparkSQLOperationManager() override def init(hiveConf: HiveConf) { setSuperField(this, "operationManager", sparkSqlOperationManager) super.init(hiveConf) } override def openSession( protocol: TProtocolVersion, username: String, passwd: String, ipAddress: String, sessionConf: java.util.Map[String, String], withImpersonation: Boolean, delegationToken: String): SessionHandle = { val sessionHandle = super.openSession(protocol, username, passwd, ipAddress, sessionConf, withImpersonation, delegationToken) val session = super.getSession(sessionHandle) HiveThriftServer2.listener.onSessionCreated( session.getIpAddress, sessionHandle.getSessionId.toString, session.getUsername) val ctx = if (sqlContext.conf.hiveThriftServerSingleSession) { sqlContext } else { sqlContext.newSession() } ctx.setConf(HiveUtils.FAKE_HIVE_VERSION.key, HiveUtils.builtinHiveVersion) if (sessionConf != null && sessionConf.containsKey("use:database")) { ctx.sql(s"use ${sessionConf.get("use:database")}") } sparkSqlOperationManager.sessionToContexts.put(sessionHandle, ctx) sessionHandle } override def closeSession(sessionHandle: SessionHandle) { HiveThriftServer2.listener.onSessionClosed(sessionHandle.getSessionId.toString) super.closeSession(sessionHandle) sparkSqlOperationManager.sessionToActivePool.remove(sessionHandle) sparkSqlOperationManager.sessionToContexts.remove(sessionHandle) } }
Example 8
Source File: SparkSQLCLIService.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.io.IOException import java.util.{List => JList} import javax.security.auth.login.LoginException import scala.collection.JavaConverters._ import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hadoop.hive.shims.Utils import org.apache.hadoop.security.UserGroupInformation import org.apache.hive.service.{AbstractService, Service, ServiceException} import org.apache.hive.service.Service.STATE import org.apache.hive.service.auth.HiveAuthFactory import org.apache.hive.service.cli._ import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.SQLContext import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLContext) extends CLIService(hiveServer) with ReflectedCompositeService { override def init(hiveConf: HiveConf) { setSuperField(this, "hiveConf", hiveConf) val sparkSqlSessionManager = new SparkSQLSessionManager(hiveServer, sqlContext) setSuperField(this, "sessionManager", sparkSqlSessionManager) addService(sparkSqlSessionManager) var sparkServiceUGI: UserGroupInformation = null var httpUGI: UserGroupInformation = null if (UserGroupInformation.isSecurityEnabled) { try { HiveAuthFactory.loginFromKeytab(hiveConf) sparkServiceUGI = Utils.getUGI() setSuperField(this, "serviceUGI", sparkServiceUGI) } catch { case e @ (_: IOException | _: LoginException) => throw new ServiceException("Unable to login to kerberos with given principal/keytab", e) } // Try creating spnego UGI if it is configured. val principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_PRINCIPAL).trim val keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_KEYTAB).trim if (principal.nonEmpty && keyTabFile.nonEmpty) { try { httpUGI = HiveAuthFactory.loginFromSpnegoKeytabAndReturnUGI(hiveConf) setSuperField(this, "httpUGI", httpUGI) } catch { case e: IOException => throw new ServiceException("Unable to login to spnego with given principal " + s"$principal and keytab $keyTabFile: $e", e) } } } initCompositeService(hiveConf) } override def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue = { getInfoType match { case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_VER => new GetInfoValue(sqlContext.sparkContext.version) case _ => super.getInfo(sessionHandle, getInfoType) } } } private[thriftserver] trait ReflectedCompositeService { this: AbstractService => def initCompositeService(hiveConf: HiveConf) { // Emulating `CompositeService.init(hiveConf)` val serviceList = getAncestorField[JList[Service]](this, 2, "serviceList") serviceList.asScala.foreach(_.init(hiveConf)) // Emulating `AbstractService.init(hiveConf)` invoke(classOf[AbstractService], this, "ensureCurrentState", classOf[STATE] -> STATE.NOTINITED) setAncestorField(this, 3, "hiveConf", hiveConf) invoke(classOf[AbstractService], this, "changeState", classOf[STATE] -> STATE.INITED) getAncestorField[Log](this, 3, "LOG").info(s"Service: $getName is inited.") } }
Example 9
Source File: SparkSQLSessionManager.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.util.concurrent.Executors import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hive.service.cli.SessionHandle import org.apache.hive.service.cli.session.SessionManager import org.apache.hive.service.cli.thrift.TProtocolVersion import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ import org.apache.spark.sql.hive.thriftserver.server.SparkSQLOperationManager private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, hiveContext: HiveContext) extends SessionManager(hiveServer) with ReflectedCompositeService { private lazy val sparkSqlOperationManager = new SparkSQLOperationManager(hiveContext) override def init(hiveConf: HiveConf) { setSuperField(this, "hiveConf", hiveConf) val backgroundPoolSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_THREADS) //用于保存等待执行的任务的阻塞队列, //LinkedBlockingQueue:一个基于链表结构的阻塞队列,此队列按FIFO(先进先出)排序元素,吞吐量通常要高于ArrayBlockingQueue //Executors.newFixedThreadPool()使用了这个队列 setSuperField(this, "backgroundOperationPool", Executors.newFixedThreadPool(backgroundPoolSize)) getAncestorField[Log](this, 3, "LOG").info( s"HiveServer2: Async execution pool size $backgroundPoolSize") setSuperField(this, "operationManager", sparkSqlOperationManager) addService(sparkSqlOperationManager) initCompositeService(hiveConf) } override def openSession( protocol: TProtocolVersion, username: String, passwd: String, ipAddress: String, sessionConf: java.util.Map[String, String], withImpersonation: Boolean, delegationToken: String): SessionHandle = { hiveContext.openSession() val sessionHandle = super.openSession(protocol, username, passwd, ipAddress, sessionConf, withImpersonation, delegationToken) val session = super.getSession(sessionHandle) HiveThriftServer2.listener.onSessionCreated( session.getIpAddress, sessionHandle.getSessionId.toString, session.getUsername) sessionHandle } override def closeSession(sessionHandle: SessionHandle) { HiveThriftServer2.listener.onSessionClosed(sessionHandle.getSessionId.toString) super.closeSession(sessionHandle) sparkSqlOperationManager.sessionToActivePool -= sessionHandle hiveContext.detachSession() } }
Example 10
Source File: SparkSQLCLIService.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.io.IOException import java.util.{List => JList} import javax.security.auth.login.LoginException import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.shims.Utils import org.apache.hadoop.security.UserGroupInformation import org.apache.hive.service.Service.STATE import org.apache.hive.service.auth.HiveAuthFactory import org.apache.hive.service.cli._ import org.apache.hive.service.server.HiveServer2 import org.apache.hive.service.{AbstractService, Service, ServiceException} import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ import scala.collection.JavaConversions._ private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, hiveContext: HiveContext) extends CLIService(hiveServer) with ReflectedCompositeService { override def init(hiveConf: HiveConf) { setSuperField(this, "hiveConf", hiveConf) val sparkSqlSessionManager = new SparkSQLSessionManager(hiveServer, hiveContext) setSuperField(this, "sessionManager", sparkSqlSessionManager) addService(sparkSqlSessionManager) var sparkServiceUGI: UserGroupInformation = null if (UserGroupInformation.isSecurityEnabled) { try { HiveAuthFactory.loginFromKeytab(hiveConf) sparkServiceUGI = Utils.getUGI() setSuperField(this, "serviceUGI", sparkServiceUGI) } catch { case e @ (_: IOException | _: LoginException) => throw new ServiceException("Unable to login to kerberos with given principal/keytab", e) } } initCompositeService(hiveConf) } override def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue = { getInfoType match { case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_VER => new GetInfoValue(hiveContext.sparkContext.version) case _ => super.getInfo(sessionHandle, getInfoType) } } } private[thriftserver] trait ReflectedCompositeService { this: AbstractService => def initCompositeService(hiveConf: HiveConf) { // Emulating `CompositeService.init(hiveConf)` val serviceList = getAncestorField[JList[Service]](this, 2, "serviceList") serviceList.foreach(_.init(hiveConf)) // Emulating `AbstractService.init(hiveConf)` invoke(classOf[AbstractService], this, "ensureCurrentState", classOf[STATE] -> STATE.NOTINITED) setAncestorField(this, 3, "hiveConf", hiveConf) invoke(classOf[AbstractService], this, "changeState", classOf[STATE] -> STATE.INITED) getAncestorField[Log](this, 3, "LOG").info(s"Service: $getName is inited.") } }
Example 11
Source File: SparkSQLCLIService.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.io.IOException import java.util.{List => JList} import javax.security.auth.login.LoginException import scala.collection.JavaConversions._ import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.shims.ShimLoader import org.apache.hadoop.security.UserGroupInformation import org.apache.hive.service.Service.STATE import org.apache.hive.service.auth.HiveAuthFactory import org.apache.hive.service.cli._ import org.apache.hive.service.{AbstractService, Service, ServiceException} import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ import org.apache.spark.util.Utils private[hive] class SparkSQLCLIService(hiveContext: HiveContext) extends CLIService with ReflectedCompositeService { override def init(hiveConf: HiveConf) { setSuperField(this, "hiveConf", hiveConf) val sparkSqlSessionManager = new SparkSQLSessionManager(hiveContext) setSuperField(this, "sessionManager", sparkSqlSessionManager) addService(sparkSqlSessionManager) var sparkServiceUGI: UserGroupInformation = null if (ShimLoader.getHadoopShims.isSecurityEnabled) { try { HiveAuthFactory.loginFromKeytab(hiveConf) sparkServiceUGI = ShimLoader.getHadoopShims.getUGIForConf(hiveConf) HiveThriftServerShim.setServerUserName(sparkServiceUGI, this) } catch { case e @ (_: IOException | _: LoginException) => throw new ServiceException("Unable to login to kerberos with given principal/keytab", e) } } initCompositeService(hiveConf) } override def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue = { getInfoType match { case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_VER => new GetInfoValue(hiveContext.sparkContext.version) case _ => super.getInfo(sessionHandle, getInfoType) } } } private[thriftserver] trait ReflectedCompositeService { this: AbstractService => def initCompositeService(hiveConf: HiveConf) { // Emulating `CompositeService.init(hiveConf)` val serviceList = getAncestorField[JList[Service]](this, 2, "serviceList") serviceList.foreach(_.init(hiveConf)) // Emulating `AbstractService.init(hiveConf)` invoke(classOf[AbstractService], this, "ensureCurrentState", classOf[STATE] -> STATE.NOTINITED) setAncestorField(this, 3, "hiveConf", hiveConf) invoke(classOf[AbstractService], this, "changeState", classOf[STATE] -> STATE.INITED) getAncestorField[Log](this, 3, "LOG").info(s"Service: $getName is inited.") } }
Example 12
Source File: SparkSQLCLIService.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.util.{List => JList} import scala.collection.JavaConverters._ import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hive.service.{AbstractService, Service} import org.apache.hive.service.Service.STATE import org.apache.hive.service.cli._ import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ private[hive] class SparkSQLCLIService( hiveServer: HiveServer2) extends CLIService(hiveServer) with ReflectedCompositeService { override def init(hiveConf: HiveConf) { this.hiveConf = hiveConf this.sessionManager = new SparkSQLSessionManager(hiveServer) addService(sessionManager) this.serviceUGI = MultiSparkSQLEnv.globalUgi initCompositeService(hiveConf) } override def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue = { getInfoType match { case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_VER => new GetInfoValue(MultiSparkSQLEnv.version) case _ => super.getInfo(sessionHandle, getInfoType) } } } private[thriftserver] trait ReflectedCompositeService { this: AbstractService => def initCompositeService(hiveConf: HiveConf) { // Emulating `CompositeService.init(hiveConf)` val serviceList = getAncestorField[JList[Service]](this, 2, "serviceList") serviceList.asScala.foreach(_.init(hiveConf)) // Emulating `AbstractService.init(hiveConf)` invoke(classOf[AbstractService], this, "ensureCurrentState", classOf[STATE] -> STATE.NOTINITED) setAncestorField(this, 3, "hiveConf", hiveConf) invoke(classOf[AbstractService], this, "changeState", classOf[STATE] -> STATE.INITED) getAncestorField[Log](this, 3, "LOG").info(s"Service: $getName is inited.") } }
Example 13
Source File: IndexedBinaryBlockReader.scala From hail with MIT License | 5 votes |
package is.hail.io import is.hail.annotations.RegionValueBuilder import is.hail.io.fs.{HadoopFS, WrappedSeekableDataInputStream} import org.apache.commons.logging.{Log, LogFactory} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.io.LongWritable import org.apache.hadoop.mapred._ abstract class KeySerializedValueRecord[K] extends Serializable { var input: Array[Byte] = _ var key: K = _ def setSerializedValue(arr: Array[Byte]) { this.input = arr } def getValue(rvb: RegionValueBuilder, includeGT: Boolean): Unit def setKey(k: K) { this.key = k } def getKey: K = key } abstract class IndexedBinaryBlockReader[T](job: Configuration, split: FileSplit) extends RecordReader[LongWritable, T] { val LOG: Log = LogFactory.getLog(classOf[IndexedBinaryBlockReader[T]].getName) val partitionStart: Long = split.getStart var pos: Long = partitionStart val end: Long = partitionStart + split.getLength val bfis = openFile() def openFile(): HadoopFSDataBinaryReader = { val file: Path = split.getPath val fs: FileSystem = file.getFileSystem(job) val is = fs.open(file) new HadoopFSDataBinaryReader( new WrappedSeekableDataInputStream( HadoopFS.toSeekableInputStream(is))) } def createKey(): LongWritable = new LongWritable() def createValue(): T def getPos: Long = pos def getProgress: Float = { if (partitionStart == end) 0.0f else Math.min(1.0f, (pos - partitionStart) / (end - partitionStart).toFloat) } def close() = bfis.close() }
Example 14
Source File: RangerAdminClientImpl.scala From spark-ranger with Apache License 2.0 | 5 votes |
package org.apache.ranger.services.spark import java.nio.file.{Files, FileSystems} import java.util import com.google.gson.GsonBuilder import org.apache.commons.logging.{Log, LogFactory} import org.apache.ranger.admin.client.RangerAdminRESTClient import org.apache.ranger.plugin.util.{GrantRevokeRequest, ServicePolicies, ServiceTags} class RangerAdminClientImpl extends RangerAdminRESTClient { private val LOG: Log = LogFactory.getLog(classOf[RangerAdminClientImpl]) private val cacheFilename = "sparkSql_hive_jenkins.json" private val gson = new GsonBuilder().setDateFormat("yyyyMMdd-HH:mm:ss.SSS-Z").setPrettyPrinting().create override def init(serviceName: String, appId: String, configPropertyPrefix: String): Unit = {} override def getServicePoliciesIfUpdated( lastKnownVersion: Long, lastActivationTimeInMillis: Long): ServicePolicies = { val basedir = this.getClass.getProtectionDomain.getCodeSource.getLocation.getPath val cachePath = FileSystems.getDefault.getPath(basedir, cacheFilename) LOG.info("Reading policies from " + cachePath) val bytes = Files.readAllBytes(cachePath) gson.fromJson(new String(bytes), classOf[ServicePolicies]) } override def grantAccess(request: GrantRevokeRequest): Unit = {} override def revokeAccess(request: GrantRevokeRequest): Unit = {} override def getServiceTagsIfUpdated( lastKnownVersion: Long, lastActivationTimeInMillis: Long): ServiceTags = null override def getTagTypes(tagTypePattern: String): util.List[String] = null }
Example 15
Source File: SparkSQLCLIService.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.io.IOException import java.util.{List => JList} import javax.security.auth.login.LoginException import scala.collection.JavaConverters._ import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.shims.Utils import org.apache.hadoop.security.UserGroupInformation import org.apache.hive.service.{AbstractService, Service, ServiceException} import org.apache.hive.service.Service.STATE import org.apache.hive.service.auth.HiveAuthFactory import org.apache.hive.service.cli._ import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.SQLContext import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLContext) extends CLIService(hiveServer) with ReflectedCompositeService { override def init(hiveConf: HiveConf) { setSuperField(this, "hiveConf", hiveConf) val sparkSqlSessionManager = new SparkSQLSessionManager(hiveServer, sqlContext) setSuperField(this, "sessionManager", sparkSqlSessionManager) addService(sparkSqlSessionManager) var sparkServiceUGI: UserGroupInformation = null if (UserGroupInformation.isSecurityEnabled) { try { HiveAuthFactory.loginFromKeytab(hiveConf) sparkServiceUGI = Utils.getUGI() setSuperField(this, "serviceUGI", sparkServiceUGI) } catch { case e @ (_: IOException | _: LoginException) => throw new ServiceException("Unable to login to kerberos with given principal/keytab", e) } } initCompositeService(hiveConf) } override def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue = { getInfoType match { case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_VER => new GetInfoValue(sqlContext.sparkContext.version) case _ => super.getInfo(sessionHandle, getInfoType) } } } private[thriftserver] trait ReflectedCompositeService { this: AbstractService => def initCompositeService(hiveConf: HiveConf) { // Emulating `CompositeService.init(hiveConf)` val serviceList = getAncestorField[JList[Service]](this, 2, "serviceList") serviceList.asScala.foreach(_.init(hiveConf)) // Emulating `AbstractService.init(hiveConf)` invoke(classOf[AbstractService], this, "ensureCurrentState", classOf[STATE] -> STATE.NOTINITED) setAncestorField(this, 3, "hiveConf", hiveConf) invoke(classOf[AbstractService], this, "changeState", classOf[STATE] -> STATE.INITED) getAncestorField[Log](this, 3, "LOG").info(s"Service: $getName is inited.") } }
Example 16
Source File: SparkSQLCLIService.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.io.IOException import java.util.{List => JList} import javax.security.auth.login.LoginException import scala.collection.JavaConverters._ import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.shims.Utils import org.apache.hadoop.security.UserGroupInformation import org.apache.hive.service.{AbstractService, Service, ServiceException} import org.apache.hive.service.Service.STATE import org.apache.hive.service.auth.HiveAuthFactory import org.apache.hive.service.cli._ import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.SQLContext import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLContext) extends CLIService(hiveServer) with ReflectedCompositeService { override def init(hiveConf: HiveConf) { setSuperField(this, "hiveConf", hiveConf) val sparkSqlSessionManager = new SparkSQLSessionManager(hiveServer, sqlContext) setSuperField(this, "sessionManager", sparkSqlSessionManager) addService(sparkSqlSessionManager) var sparkServiceUGI: UserGroupInformation = null if (UserGroupInformation.isSecurityEnabled) { try { HiveAuthFactory.loginFromKeytab(hiveConf) sparkServiceUGI = Utils.getUGI() setSuperField(this, "serviceUGI", sparkServiceUGI) } catch { case e @ (_: IOException | _: LoginException) => throw new ServiceException("Unable to login to kerberos with given principal/keytab", e) } } initCompositeService(hiveConf) } override def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue = { getInfoType match { case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_VER => new GetInfoValue(sqlContext.sparkContext.version) case _ => super.getInfo(sessionHandle, getInfoType) } } } private[thriftserver] trait ReflectedCompositeService { this: AbstractService => def initCompositeService(hiveConf: HiveConf) { // Emulating `CompositeService.init(hiveConf)` val serviceList = getAncestorField[JList[Service]](this, 2, "serviceList") serviceList.asScala.foreach(_.init(hiveConf)) // Emulating `AbstractService.init(hiveConf)` invoke(classOf[AbstractService], this, "ensureCurrentState", classOf[STATE] -> STATE.NOTINITED) setAncestorField(this, 3, "hiveConf", hiveConf) invoke(classOf[AbstractService], this, "changeState", classOf[STATE] -> STATE.INITED) getAncestorField[Log](this, 3, "LOG").info(s"Service: $getName is inited.") } }
Example 17
Source File: ParentTest.scala From Soteria with MIT License | 5 votes |
package com.leobenkel.soteria import org.apache.commons.logging.{Log, LogFactory} import org.scalactic.source.Position import org.scalatest.{FunSuite, Tag} trait ParentTest extends FunSuite { lazy val log: Log = LogFactory.getLog(this.getClass) protected def assertEquals[T]( expected: T, result: T )( implicit pos: Position ): Unit = { assertResult(expected)(result) () } override protected def test( testName: String, testTags: Tag* )( testFun: => Any )( implicit pos: Position ): Unit = { super.test(testName, testTags: _*) { log.debug(s">>> Starting - $testName") testFun } } def time[R](block: => R): (R, Long) = { val t0 = System.nanoTime() val result = block val t1 = System.nanoTime() val time_ns: Long = t1 - t0 (result, time_ns) } }
Example 18
Source File: ExcelRelation.scala From spark-hadoopoffice-ds with Apache License 2.0 | 5 votes |
package org.zuinnote.spark.office.excel import scala.collection.JavaConversions._ import org.apache.spark.sql.sources.{ BaseRelation, TableScan } import org.apache.spark.sql.types.DataType import org.apache.spark.sql.types.ArrayType import org.apache.spark.sql.types.StringType import org.apache.spark.sql.types.StructField import org.apache.spark.sql.types.StructType import org.apache.spark.sql.SQLContext import org.apache.spark.sql._ import org.apache.spark.rdd.RDD import org.apache.hadoop.conf._ import org.apache.hadoop.mapreduce._ import org.apache.commons.logging.LogFactory import org.apache.commons.logging.Log import org.zuinnote.hadoop.office.format.common.dao._ import org.zuinnote.hadoop.office.format.mapreduce._ import org.zuinnote.spark.office.excel.util.ExcelFile override def buildScan: RDD[Row] = { // read ExcelRows val excelRowsRDD = ExcelFile.load(sqlContext, location, hadoopParams) // map to schema val schemaFields = schema.fields excelRowsRDD.flatMap(excelKeyValueTuple => { // map the Excel row data structure to a Spark SQL schema val rowArray = new Array[Any](excelKeyValueTuple._2.get.length) var i = 0; for (x <- excelKeyValueTuple._2.get) { // parse through the SpreadSheetCellDAO val spreadSheetCellDAOStructArray = new Array[String](schemaFields.length) val currentSpreadSheetCellDAO: Array[SpreadSheetCellDAO] = excelKeyValueTuple._2.get.asInstanceOf[Array[SpreadSheetCellDAO]] spreadSheetCellDAOStructArray(0) = currentSpreadSheetCellDAO(i).getFormattedValue spreadSheetCellDAOStructArray(1) = currentSpreadSheetCellDAO(i).getComment spreadSheetCellDAOStructArray(2) = currentSpreadSheetCellDAO(i).getFormula spreadSheetCellDAOStructArray(3) = currentSpreadSheetCellDAO(i).getAddress spreadSheetCellDAOStructArray(4) = currentSpreadSheetCellDAO(i).getSheetName // add row representing one Excel row rowArray(i) = spreadSheetCellDAOStructArray i += 1 } Some(Row.fromSeq(rowArray)) }) } }
Example 19
Source File: HadoopFileExcelReader.scala From spark-hadoopoffice-ds with Apache License 2.0 | 5 votes |
package org.zuinnote.spark.office.excel import java.io.Closeable import java.net.URI import org.apache.spark.sql.execution.datasources._ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.hadoop.io.ArrayWritable import org.apache.hadoop.io.Text import org.apache.hadoop.mapreduce._ import org.apache.hadoop.mapreduce.lib.input.{ FileSplit, LineRecordReader } import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl import org.apache.spark.sql.execution.datasources.RecordReaderIterator import org.zuinnote.hadoop.office.format.mapreduce.ExcelFileInputFormat import org.zuinnote.hadoop.office.format.mapreduce.ExcelRecordReader import org.apache.commons.logging.LogFactory import org.apache.commons.logging.Log class HadoopFileExcelReader( file: PartitionedFile, conf: Configuration) extends Iterator[ArrayWritable] with Closeable { val LOG = LogFactory.getLog(classOf[HadoopFileExcelReader]) private var reader: RecordReader[Text, ArrayWritable] = null private val iterator = { val fileSplit = new FileSplit( new Path(new URI(file.filePath)), file.start, file.length, Array.empty) // todo: implement locality (replace Array.empty with the locations) val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0) val hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId) val inputFormat = new ExcelFileInputFormat() reader = inputFormat.createRecordReader(fileSplit, hadoopAttemptContext) reader.initialize(fileSplit, hadoopAttemptContext) new RecordReaderIterator(reader) } def getReader: RecordReader[Text, ArrayWritable] = reader override def hasNext: Boolean = iterator.hasNext override def next(): ArrayWritable = iterator.next() override def close(): Unit = { if (reader != null) { reader.close() } } }
Example 20
Source File: ExcelOutputWriter.scala From spark-hadoopoffice-ds with Apache License 2.0 | 5 votes |
package org.zuinnote.spark.office.excel import java.math.BigDecimal import java.sql.Date import java.sql.Timestamp import java.text.DateFormat import java.text.SimpleDateFormat import java.util.Calendar import org.apache.hadoop.conf.Configuration import org.apache.hadoop.io.NullWritable import org.apache.hadoop.io.ArrayWritable import org.apache.hadoop.mapreduce.RecordWriter import org.apache.hadoop.mapreduce.TaskAttemptContext import org.apache.hadoop.fs.Path import org.apache.spark.sql.catalyst.{ CatalystTypeConverters, InternalRow } import org.apache.spark.sql.Row import org.apache.spark.sql.execution.datasources.OutputWriter import org.apache.spark.sql.types._ import org.zuinnote.hadoop.office.format.common.dao.SpreadSheetCellDAO import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration import org.zuinnote.hadoop.office.format.common.util.msexcel.MSExcelUtil import org.zuinnote.hadoop.office.format.mapreduce._ import org.apache.commons.logging.LogFactory import org.apache.commons.logging.Log import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration import java.util.Locale import java.text.DecimalFormat import org.zuinnote.hadoop.office.format.common.converter.ExcelConverterSimpleSpreadSheetCellDAO import java.text.NumberFormat // NOTE: This class is instantiated and used on executor side only, no need to be serializable. private[excel] class ExcelOutputWriter( path: String, dataSchema: StructType, context: TaskAttemptContext, options: Map[String, String]) extends OutputWriter { def write(row: Row): Unit = { // check useHeader if (useHeader) { val headers = row.schema.fieldNames var i = 0 for (x <- headers) { val headerColumnSCD = new SpreadSheetCellDAO(x, "", "", MSExcelUtil.getCellAddressA1Format(currentRowNum, i), defaultSheetName) recordWriter.write(NullWritable.get(), headerColumnSCD) i += 1 } currentRowNum += 1 useHeader = false } // for each value in the row if (row.size>0) { var currentColumnNum = 0; val simpleObject = new Array[AnyRef](row.size) for (i <- 0 to row.size - 1) { // for each element of the row val obj = row.get(i) if ((obj.isInstanceOf[Seq[String]]) && (obj.asInstanceOf[Seq[String]].length==5)) { val formattedValue = obj.asInstanceOf[Seq[String]](0) val comment = obj.asInstanceOf[Seq[String]](1) val formula = obj.asInstanceOf[Seq[String]](2) val address = obj.asInstanceOf[Seq[String]](3) val sheetName = obj.asInstanceOf[Seq[String]](4) simpleObject(i) = new SpreadSheetCellDAO(formattedValue,comment,formula,address,sheetName) } else { simpleObject(i)=obj.asInstanceOf[AnyRef] } } // convert row to spreadsheetcellDAO val spreadSheetCellDAORow = simpleConverter.getSpreadSheetCellDAOfromSimpleDataType(simpleObject, defaultSheetName, currentRowNum) // write it for (x<- spreadSheetCellDAORow) { recordWriter.write(NullWritable.get(), x) } } currentRowNum += 1 } override def close(): Unit = { recordWriter.close(context) currentRowNum = 0; } }
Example 21
Source File: SparkSQLSessionManager.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.util.concurrent.Executors import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hive.service.cli.SessionHandle import org.apache.hive.service.cli.session.SessionManager import org.apache.hive.service.cli.thrift.TProtocolVersion import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.SQLContext import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ import org.apache.spark.sql.hive.thriftserver.server.SparkSQLOperationManager private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext: SQLContext) extends SessionManager(hiveServer) with ReflectedCompositeService { private lazy val sparkSqlOperationManager = new SparkSQLOperationManager() override def init(hiveConf: HiveConf) { setSuperField(this, "operationManager", sparkSqlOperationManager) super.init(hiveConf) } override def openSession( protocol: TProtocolVersion, username: String, passwd: String, ipAddress: String, sessionConf: java.util.Map[String, String], withImpersonation: Boolean, delegationToken: String): SessionHandle = { val sessionHandle = super.openSession(protocol, username, passwd, ipAddress, sessionConf, withImpersonation, delegationToken) val session = super.getSession(sessionHandle) HiveThriftServer2.listener.onSessionCreated( session.getIpAddress, sessionHandle.getSessionId.toString, session.getUsername) val ctx = if (sqlContext.conf.hiveThriftServerSingleSession) { sqlContext } else { sqlContext.newSession() } ctx.setConf(HiveUtils.FAKE_HIVE_VERSION.key, HiveUtils.builtinHiveVersion) if (sessionConf != null && sessionConf.containsKey("use:database")) { ctx.sql(s"use ${sessionConf.get("use:database")}") } sparkSqlOperationManager.sessionToContexts.put(sessionHandle, ctx) sessionHandle } override def closeSession(sessionHandle: SessionHandle) { HiveThriftServer2.listener.onSessionClosed(sessionHandle.getSessionId.toString) super.closeSession(sessionHandle) sparkSqlOperationManager.sessionToActivePool.remove(sessionHandle) sparkSqlOperationManager.sessionToContexts.remove(sessionHandle) } }
Example 22
Source File: SparkSQLCLIService.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.io.IOException import java.util.{List => JList} import javax.security.auth.login.LoginException import scala.collection.JavaConverters._ import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hadoop.hive.shims.Utils import org.apache.hadoop.security.{SecurityUtil, UserGroupInformation} import org.apache.hive.service.{AbstractService, Service, ServiceException} import org.apache.hive.service.Service.STATE import org.apache.hive.service.auth.HiveAuthFactory import org.apache.hive.service.cli._ import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.SQLContext import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLContext) extends CLIService(hiveServer) with ReflectedCompositeService { override def init(hiveConf: HiveConf) { setSuperField(this, "hiveConf", hiveConf) val sparkSqlSessionManager = new SparkSQLSessionManager(hiveServer, sqlContext) setSuperField(this, "sessionManager", sparkSqlSessionManager) addService(sparkSqlSessionManager) var sparkServiceUGI: UserGroupInformation = null var httpUGI: UserGroupInformation = null if (UserGroupInformation.isSecurityEnabled) { try { val principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL) val keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB) if (principal.isEmpty || keyTabFile.isEmpty) { throw new IOException( "HiveServer2 Kerberos principal or keytab is not correctly configured") } val originalUgi = UserGroupInformation.getCurrentUser sparkServiceUGI = if (HiveAuthFactory.needUgiLogin(originalUgi, SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keyTabFile)) { HiveAuthFactory.loginFromKeytab(hiveConf) Utils.getUGI() } else { originalUgi } setSuperField(this, "serviceUGI", sparkServiceUGI) } catch { case e @ (_: IOException | _: LoginException) => throw new ServiceException("Unable to login to kerberos with given principal/keytab", e) } // Try creating spnego UGI if it is configured. val principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_PRINCIPAL).trim val keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_KEYTAB).trim if (principal.nonEmpty && keyTabFile.nonEmpty) { try { httpUGI = HiveAuthFactory.loginFromSpnegoKeytabAndReturnUGI(hiveConf) setSuperField(this, "httpUGI", httpUGI) } catch { case e: IOException => throw new ServiceException("Unable to login to spnego with given principal " + s"$principal and keytab $keyTabFile: $e", e) } } } initCompositeService(hiveConf) } override def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue = { getInfoType match { case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_VER => new GetInfoValue(sqlContext.sparkContext.version) case _ => super.getInfo(sessionHandle, getInfoType) } } } private[thriftserver] trait ReflectedCompositeService { this: AbstractService => def initCompositeService(hiveConf: HiveConf) { // Emulating `CompositeService.init(hiveConf)` val serviceList = getAncestorField[JList[Service]](this, 2, "serviceList") serviceList.asScala.foreach(_.init(hiveConf)) // Emulating `AbstractService.init(hiveConf)` invoke(classOf[AbstractService], this, "ensureCurrentState", classOf[STATE] -> STATE.NOTINITED) setAncestorField(this, 3, "hiveConf", hiveConf) invoke(classOf[AbstractService], this, "changeState", classOf[STATE] -> STATE.INITED) getAncestorField[Log](this, 3, "LOG").info(s"Service: $getName is inited.") } }
Example 23
Source File: RandomSearch.scala From automl with Apache License 2.0 | 5 votes |
package com.tencent.angel.spark.automl.tuner.acquisition.optimizer import com.tencent.angel.spark.automl.tuner.TunerParam import com.tencent.angel.spark.automl.tuner.acquisition.Acquisition import com.tencent.angel.spark.automl.tuner.config.{Configuration, ConfigurationSpace} import org.apache.commons.logging.{Log, LogFactory} import scala.util.Random class RandomSearch( override val acqFunc: Acquisition, override val configSpace: ConfigurationSpace, seed: Int = 100) extends AcqOptimizer(acqFunc, configSpace) { val LOG: Log = LogFactory.getLog(classOf[RandomSearch]) val rd = new Random(seed) override def maximize(numPoints: Int, sorted: Boolean = true): Array[(Double, Configuration)] = { //println(s"maximize RandomSearch") val configs: Array[Configuration] = configSpace.sample(TunerParam.sampleSize) if (configs.isEmpty) { Array[(Double, Configuration)]() } else { //configs.foreach { config => // println(s"sample a configuration: ${config.getVector.toArray.mkString(",")}") //} val retConfigs = if (sorted) { configs.map { config => (acqFunc.compute(config.getVector)._1, config) }.sortWith(_._1 > _._1).take(numPoints) } else { rd.shuffle(configs.map { config => (acqFunc.compute(config.getVector)._1, config) }.toTraversable).take(numPoints).toArray } retConfigs } } override def maximize: (Double, Configuration) = { maximize(1, true).head } }
Example 24
package com.tencent.angel.spark.automl.tuner.acquisition import com.tencent.angel.spark.automl.tuner.surrogate.Surrogate import org.apache.commons.logging.{Log, LogFactory} import org.apache.commons.math3.distribution.NormalDistribution import org.apache.spark.ml.linalg.{Vector, Vectors} class EI( override val surrogate: Surrogate, val par: Double) extends Acquisition(surrogate) { val LOG: Log = LogFactory.getLog(classOf[Surrogate]) override def compute(X: Vector, derivative: Boolean = false): (Double, Vector) = { val pred = surrogate.predict(X) // (mean, variance) // Use the best seen observation as incumbent val eta: Double = surrogate.curBest._2 //println(s"best seen result: $eta") val m: Double = pred._1 val s: Double = Math.sqrt(pred._2) //println(s"${X.toArray.mkString("(", ",", ")")}: mean[$m], variance[$s]") if (s == 0) { // if std is zero, we have observed x on all instances // using a RF, std should be never exactly 0.0 (0.0, Vectors.dense(new Array[Double](X.size))) } else { val z = (pred._1 - eta - par) / s val norm: NormalDistribution = new NormalDistribution val cdf: Double = norm.cumulativeProbability(z) val pdf: Double = norm.density(z) val ei = s * (z * cdf + pdf) //println(s"EI of ${X.toArray.mkString("(", ",", ")")}: $ei, cur best: $eta, z: $z, cdf: $cdf, pdf: $pdf") (ei, Vectors.dense(new Array[Double](X.size))) } } }
Example 25
package com.tencent.angel.spark.automl.tuner.acquisition import com.tencent.angel.spark.automl.tuner.surrogate.Surrogate import org.apache.commons.logging.{Log, LogFactory} import org.apache.spark.ml.linalg.{Vector, Vectors} class UCB( override val surrogate: Surrogate, val beta: Double = 100) extends Acquisition(surrogate) { val LOG: Log = LogFactory.getLog(classOf[Surrogate]) override def compute(X: Vector, derivative: Boolean = false): (Double, Vector) = { val pred = surrogate.predict(X) // (mean, variance) val m: Double = pred._1 val s: Double = Math.sqrt(pred._2) if (s == 0) { // if std is zero, we have observed x on all instances // using a RF, std should be never exactly 0.0 (0.0, Vectors.dense(new Array[Double](X.size))) } else { val ucb = m + beta * s (ucb, Vectors.dense(new Array[Double](X.size))) } } }
Example 26
Source File: Surrogate.scala From automl with Apache License 2.0 | 5 votes |
package com.tencent.angel.spark.automl.tuner.surrogate import com.tencent.angel.spark.automl.tuner.config.ConfigurationSpace import org.apache.commons.logging.{Log, LogFactory} import org.apache.spark.ml.linalg.Vector import org.apache.spark.sql.types.{DataTypes, StructField, StructType} import scala.collection.mutable.ArrayBuffer def predict(X: Vector): (Double, Double) def stop(): Unit def curBest: (Vector, Double) = { if (minimize) curMin else curMax } def curMin: (Vector, Double) = { if (preY.isEmpty) (null, Double.MaxValue) else { val maxIdx: Int = preY.zipWithIndex.max._2 (preX(maxIdx), -preY(maxIdx)) } } def curMax: (Vector, Double) = { if (preY.isEmpty) (null, Double.MinValue) else { val maxIdx: Int = preY.zipWithIndex.max._2 (preX(maxIdx), preY(maxIdx)) } } }
Example 27
Source File: SpotlightLog.scala From dbpedia-spotlight-model with Apache License 2.0 | 5 votes |
package org.dbpedia.spotlight.log import org.apache.commons.logging.{Log, LogFactory} import scala.collection.mutable trait SpotlightLog[T] { def _debug(c:Class[_], msg: T, args: Any*) def _info(c:Class[_], msg: T, args: Any*) def _error(c:Class[_], msg: T, args: Any*) def _fatal(c:Class[_], msg: T, args: Any*) def _trace(c:Class[_], msg: T, args: Any*) def _warn(c:Class[_], msg: T, args: Any*) } object SpotlightLog { def debug[T](c:Class[_], msg: T, args: Any*)(implicit instance: SpotlightLog[T]) = instance._debug(c, msg, args: _*) def info[T](c:Class[_], msg: T, args: Any*)(implicit instance: SpotlightLog[T]) = instance._info(c, msg, args: _*) def error[T](c:Class[_], msg: T, args: Any*)(implicit instance: SpotlightLog[T]) = instance._error(c, msg, args: _*) def fatal[T](c:Class[_], msg: T, args: Any*)(implicit instance: SpotlightLog[T]) = instance._fatal(c, msg, args: _*) def trace[T](c:Class[_], msg: T, args: Any*)(implicit instance: SpotlightLog[T]) = instance._trace(c, msg, args: _*) def warn[T](c:Class[_], msg: T, args: Any*)(implicit instance: SpotlightLog[T]) = instance._warn(c, msg, args: _*) implicit object StringSpotlightLog extends SpotlightLog[String] { val loggers = new mutable.HashMap[Class[_], Log]() def _debug(c:Class[_], msg: String, args: Any*) = { val log = loggers.getOrElseUpdate(c, LogFactory.getLog(c)) if (log.isDebugEnabled) { if(args.size == 0) log.debug(msg) else log.debug(msg.format(args: _*)) } } def _info(c:Class[_], msg: String, args: Any*) = { val log = loggers.getOrElseUpdate(c, LogFactory.getLog(c)) if(log.isInfoEnabled) { if(args.size == 0) log.info(msg) else log.info(msg.format(args: _*)) } } def _error(c:Class[_], msg: String, args: Any*) = { val log = loggers.getOrElseUpdate(c, LogFactory.getLog(c)) if(log.isErrorEnabled) { if(args.size == 0) log.error(msg) else log.error(msg.format(args: _*)) } } def _fatal(c:Class[_], msg: String, args: Any*) = { val log = loggers.getOrElseUpdate(c, LogFactory.getLog(c)) if(log.isFatalEnabled) { if(args.size == 0) log.fatal(msg) else log.fatal(msg.format(args: _*)) } } def _trace(c:Class[_], msg: String, args: Any*) = { val log = loggers.getOrElseUpdate(c, LogFactory.getLog(c)) if(log.isTraceEnabled) { if(args.size == 0) log.trace(msg) else log.trace(msg.format(args: _*)) } } def _warn(c:Class[_], msg: String, args: Any*) = { val log = loggers.getOrElseUpdate(c, LogFactory.getLog(c)) if(log.isWarnEnabled) { if(args.size == 0) log.warn(msg) else log.warn(msg.format(args: _*)) } } } }
Example 28
Source File: SparkSQLSessionManager.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.util.concurrent.Executors import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hive.service.cli.SessionHandle import org.apache.hive.service.cli.session.SessionManager import org.apache.hive.service.cli.thrift.TProtocolVersion import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.SQLContext import org.apache.spark.sql.hive.{HiveSessionState, HiveUtils} import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ import org.apache.spark.sql.hive.thriftserver.server.SparkSQLOperationManager private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext: SQLContext) extends SessionManager(hiveServer) with ReflectedCompositeService { private lazy val sparkSqlOperationManager = new SparkSQLOperationManager() override def init(hiveConf: HiveConf) { setSuperField(this, "hiveConf", hiveConf) // Create operation log root directory, if operation logging is enabled if (hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED)) { invoke(classOf[SessionManager], this, "initOperationLogRootDir") } val backgroundPoolSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_THREADS) setSuperField(this, "backgroundOperationPool", Executors.newFixedThreadPool(backgroundPoolSize)) getAncestorField[Log](this, 3, "LOG").info( s"HiveServer2: Async execution pool size $backgroundPoolSize") setSuperField(this, "operationManager", sparkSqlOperationManager) addService(sparkSqlOperationManager) initCompositeService(hiveConf) } override def openSession( protocol: TProtocolVersion, username: String, passwd: String, ipAddress: String, sessionConf: java.util.Map[String, String], withImpersonation: Boolean, delegationToken: String): SessionHandle = { val sessionHandle = super.openSession(protocol, username, passwd, ipAddress, sessionConf, withImpersonation, delegationToken) val session = super.getSession(sessionHandle) HiveThriftServer2.listener.onSessionCreated( session.getIpAddress, sessionHandle.getSessionId.toString, session.getUsername) val sessionState = sqlContext.sessionState.asInstanceOf[HiveSessionState] val ctx = if (sessionState.hiveThriftServerSingleSession) { sqlContext } else { sqlContext.newSession() } ctx.setConf("spark.sql.hive.version", HiveUtils.hiveExecutionVersion) if (sessionConf != null && sessionConf.containsKey("use:database")) { ctx.sql(s"use ${sessionConf.get("use:database")}") } sparkSqlOperationManager.sessionToContexts.put(sessionHandle, ctx) sessionHandle } override def closeSession(sessionHandle: SessionHandle) { HiveThriftServer2.listener.onSessionClosed(sessionHandle.getSessionId.toString) super.closeSession(sessionHandle) sparkSqlOperationManager.sessionToActivePool.remove(sessionHandle) sparkSqlOperationManager.sessionToContexts.remove(sessionHandle) } }