org.apache.spark.sql.catalyst.analysis.FunctionRegistry Scala Examples

The following examples show how to use org.apache.spark.sql.catalyst.analysis.FunctionRegistry. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: NativeFunctionRegistration.scala    From spark-alchemy   with Apache License 2.0 5 votes vote down vote up
package com.swoop.alchemy.spark.expressions

import org.apache.spark.sql.EncapsulationViolator.createAnalysisException
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.FunctionIdentifier
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription, ExpressionInfo, RuntimeReplaceable}

import scala.reflect.ClassTag
import scala.util.{Failure, Success, Try}

// based on Spark's FunctionRegistry @ossSpark
trait NativeFunctionRegistration extends FunctionRegistration {

  type FunctionBuilder = Seq[Expression] => Expression

  def expressions: Map[String, (ExpressionInfo, FunctionBuilder)]


  def registerFunctions(fr: FunctionRegistry): Unit = {
    expressions.foreach { case (name, (info, builder)) => fr.registerFunction(FunctionIdentifier(name), info, builder) }
  }

  def registerFunctions(spark: SparkSession): Unit = {
    registerFunctions(spark.sessionState.functionRegistry)
  }

  
  protected def expressionInfo[T <: Expression : ClassTag](name: String): ExpressionInfo = {
    val clazz = scala.reflect.classTag[T].runtimeClass
    val df = clazz.getAnnotation(classOf[ExpressionDescription])
    if (df != null) {
      new ExpressionInfo(clazz.getCanonicalName, null, name, df.usage(), df.extended())
    } else {
      new ExpressionInfo(clazz.getCanonicalName, name)
    }
  }

} 
Example 2
Source File: PythonSQLUtils.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.api.python

import java.io.InputStream
import java.nio.channels.Channels

import org.apache.spark.api.java.JavaRDD
import org.apache.spark.api.python.PythonRDDServer
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.catalyst.expressions.ExpressionInfo
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
import org.apache.spark.sql.execution.arrow.ArrowConverters
import org.apache.spark.sql.types.DataType

private[sql] object PythonSQLUtils {
  def parseDataType(typeText: String): DataType = CatalystSqlParser.parseDataType(typeText)

  // This is needed when generating SQL documentation for built-in functions.
  def listBuiltinFunctionInfos(): Array[ExpressionInfo] = {
    FunctionRegistry.functionSet.flatMap(f => FunctionRegistry.builtin.lookupFunction(f)).toArray
  }

  
private[sql] class ArrowRDDServer(sqlContext: SQLContext) extends PythonRDDServer {

  override protected def streamToRDD(input: InputStream): RDD[Array[Byte]] = {
    // Create array to consume iterator so that we can safely close the inputStream
    val batches = ArrowConverters.getBatchesFromStream(Channels.newChannel(input)).toArray
    // Parallelize the record batches to create an RDD
    JavaRDD.fromRDD(sqlContext.sparkContext.parallelize(batches, batches.length))
  }

} 
Example 3
Source File: SQLContextExtensionBase.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.extension

import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.{ParserDialect, TableIdentifier}
import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry, SimpleFunctionRegistry}
import org.apache.spark.sql.catalyst.errors.DialectException
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.execution.datasources.DDLParser
import org.apache.spark.sql.extension.OptimizerFactory.ExtendableOptimizerBatch
import org.apache.spark.util.Utils

import scala.util.Try
import scala.util.control.NonFatal


  override protected def extendedParserDialect: ParserDialect =
    try {
      val clazz = Utils.classForName(dialectClassName)
      clazz.newInstance().asInstanceOf[ParserDialect]
    } catch {
      case NonFatal(e) =>
        // Since we didn't find the available SQL Dialect, it will fail even for SET command:
        // SET spark.sql.dialect=sql; Let's reset as default dialect automatically.
        val dialect = conf.dialect
        // reset the sql dialect
        conf.unsetConf(SQLConf.DIALECT)
        // throw out the exception, and the default sql dialect will take effect for next query.
        throw new DialectException(
          s"""
              |Instantiating dialect '$dialect' failed.
              |Reverting to default dialect '${conf.dialect}'""".stripMargin, e)
    }

  // (suggestion) make this implicit to FunctionRegistry.
  protected def registerBuiltins(registry: FunctionRegistry): Unit = {
    FunctionRegistry.expressions.foreach {
      case (name, (info, builder)) => registry.registerFunction(name, builder)
    }
  }

  override protected def extendedDdlParser(parser: String => LogicalPlan): DDLParser =
    new DDLParser(sqlParser.parse(_))

  override protected def registerFunctions(registry: FunctionRegistry): Unit = { }

} 
Example 4
Source File: RegisterHierarchyFunctions.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.catalyst.expressions._

import FunctionBuilders._

private[sql] object RegisterHierarchyFunctions {

  def apply(functionRegistry: FunctionRegistry): Unit = {
    val r = (name: String, builder: ExpressionBuilder) =>
      functionRegistry.registerFunction(name, builder)
    r("level", unaryExpression[Level])
    r("post_rank", unaryExpression[PostRank])
    r("pre_rank", unaryExpression[PreRank])
    r("is_root", unaryExpression[IsRoot])
    r("is_leaf", unaryExpression[IsLeaf])
    r("name", unaryExpression[Name])
    r("is_descendant", binaryExpression[IsDescendant])
    r("is_descendant_or_self", binaryExpression[IsDescendantOrSelf])
    r("is_ancestor", reverse(binaryExpression[IsDescendant]))
    r("is_ancestor_or_self", reverse(binaryExpression[IsDescendantOrSelf]))
    r("is_parent", binaryExpression[IsParent])
    r("is_child", reverse(binaryExpression[IsParent]))
    r("is_sibling", binaryExpression[IsSibling])
    r("is_self", binaryExpression[IsSelf])
    r("is_sibling_or_self", binaryExpression[IsSiblingOrSelf])
    r("is_following", binaryExpression[IsFollowing])
    r("is_preceding", reverse(binaryExpression[IsFollowing]))
  }

} 
Example 5
Source File: RegisterCustomFunctions.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.currency.CurrencyConversionFunction
import org.apache.spark.sql.types._
import scala.reflect.ClassTag


  // TODO move this to an implicit function in the registry.
  private[this] def registerExpression[T <: Expression](registry: FunctionRegistry, name: String)
                                         (implicit tag: ClassTag[T]): Unit = {
    val (_, (_, builder)) = expression[T](name)
    registry.registerFunction(name, builder)
  }

  def apply(registry: FunctionRegistry): Unit = {
    registerExpression[Remainder](registry, "remainder")
    registerExpression[Remainder](registry, "mod")
    registerExpression[AddYears](registry, "add_years")
    registerExpression[AddSeconds](registry, "add_seconds")
    registerExpression[DateAdd](registry, "add_days")
    registerExpression[Replace](registry, "replace")
    registerExpression[Log](registry, "ln")
    registry.registerFunction("to_double", toDoubleBuilder)
    registry.registerFunction("to_integer", toIntegerBuilder)
    registry.registerFunction("to_varchar", toVarcharBuilder)
    registry.registerFunction("rand", randBuilder)
    registry.registerFunction("days_between", daysBetweenBuilder)

    // register all currency conversions
    CurrencyConversionFunction.functions.foreach {
      case (name, impl) => registry.registerFunction(name, impl.getExpression)

    }
  }

  private def toDoubleBuilder(expressions: Seq[Expression]): Expression =
    expressions match {
      case Seq(exp) => Cast(exp, DoubleType)
      case _ =>
        throw new AnalysisException("Input argument to TO_DOUBLE must be a single expression")
    }

  private def toIntegerBuilder(expressions: Seq[Expression]): Expression =
    expressions match {
      case Seq(exp) => Cast(exp, IntegerType)
      case _ =>
        throw new AnalysisException("Input argument to TO_INTEGER must be a single expression")
    }

  private def toVarcharBuilder(expressions: Seq[Expression]): Expression =
    expressions match {
      case Seq(exp) => Cast(exp, StringType)
      case _ =>
        throw new AnalysisException("Input argument to TO_VARCHAR must be a single expression")
    }

  private def randBuilder(expressions: Seq[Expression]): Expression =
    expressions match {
      case Nil => new Rand()
      case Seq(IntegerLiteral(n)) => new Rand(n)
      case _ => throw new AnalysisException("Input argument to RAND must be an integer literal.")
    }

  private def daysBetweenBuilder(expressions: Seq[Expression]): Expression =
    expressions match {
      case Seq(exp1, exp2) => Abs(DateDiff(exp1, exp2))
      case _ => throw new AnalysisException("Input argument to DAYS_BETWEEN must two expressions.")
    }

} 
Example 6
Source File: PythonSQLUtils.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.api.python

import org.apache.spark.api.java.JavaRDD
import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.catalyst.expressions.ExpressionInfo
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
import org.apache.spark.sql.execution.arrow.ArrowConverters
import org.apache.spark.sql.types.DataType

private[sql] object PythonSQLUtils {
  def parseDataType(typeText: String): DataType = CatalystSqlParser.parseDataType(typeText)

  // This is needed when generating SQL documentation for built-in functions.
  def listBuiltinFunctionInfos(): Array[ExpressionInfo] = {
    FunctionRegistry.functionSet.flatMap(f => FunctionRegistry.builtin.lookupFunction(f)).toArray
  }

  
  def arrowPayloadToDataFrame(
      payloadRDD: JavaRDD[Array[Byte]],
      schemaString: String,
      sqlContext: SQLContext): DataFrame = {
    ArrowConverters.toDataFrame(payloadRDD, schemaString, sqlContext)
  }
}