org.apache.spark.ml.PredictionModel Scala Examples

The following examples show how to use org.apache.spark.ml.PredictionModel. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: LocalPredictionModel.scala    From spark-ml-serving   with Apache License 2.0 5 votes vote down vote up
package io.hydrosphere.spark_ml_serving.common

import org.apache.spark.ml.PredictionModel
import org.apache.spark.ml.linalg.Vector

import scala.reflect.ClassTag

abstract class LocalPredictionModel[T <: PredictionModel[Vector, T]] extends LocalTransformer[T] {
  def predict(v: List[Double]): Double = invoke[Double]('predict, v)

  override def transform(localData: LocalData): LocalData = {
    localData.column(sparkTransformer.getFeaturesCol) match {
      case Some(column) =>
        val predictionCol = LocalDataColumn(
          sparkTransformer.getPredictionCol,
          column.data.map(_.asInstanceOf[List[Double]]).map(predict)
        )
        localData.withColumn(predictionCol)
      case None => localData
    }
  }
} 
Example 2
Source File: GaussianProcessCommons.scala    From spark-gp   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.commons

import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV}
import breeze.optimize.LBFGSB
import org.apache.spark.ml.commons.kernel.{EyeKernel, Kernel, _}
import org.apache.spark.ml.commons.util.DiffFunctionMemoized
import org.apache.spark.ml.feature.LabeledPoint
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.ml.util.Instrumentation
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.{Dataset, Row}

private[ml] trait GaussianProcessCommons[F, E <: Predictor[F, E, M], M <: PredictionModel[F, M]]
  extends ProjectedGaussianProcessHelper {  this: Predictor[F, E, M] with GaussianProcessParams =>

  protected val getKernel : () => Kernel = () => $(kernel)() + $(sigma2).const * new EyeKernel

  protected def getPoints(dataset: Dataset[_]) = {
    dataset.select(col($(labelCol)), col($(featuresCol))).rdd.map {
      case Row(label: Double, features: Vector) => LabeledPoint(label, features)
    }
  }

  protected def groupForExperts(points: RDD[LabeledPoint]) = {
    val numberOfExperts = Math.round(points.count().toDouble / $(datasetSizeForExpert))
    points.zipWithIndex.map { case(instance, index) =>
      (index % numberOfExperts, instance)
    }.groupByKey().map(_._2)
  }

  protected def getExpertLabelsAndKernels(points: RDD[LabeledPoint]): RDD[(BDV[Double], Kernel)] = {
    groupForExperts(points).map { chunk =>
      val (labels, trainingVectors) = chunk.map(lp => (lp.label, lp.features)).toArray.unzip
      (BDV(labels: _*), getKernel().setTrainingVectors(trainingVectors))
    }
  }

  protected def projectedProcess(expertLabelsAndKernels: RDD[(BDV[Double], Kernel)],
                                 points: RDD[LabeledPoint],
                                 optimalHyperparameters: BDV[Double]) = {
    val activeSet = $(activeSetProvider)($(activeSetSize), expertLabelsAndKernels, points,
      getKernel, optimalHyperparameters, $(seed))

    points.unpersist()

    val (matrixKmnKnm, vectorKmny) = getMatrixKmnKnmAndVectorKmny(expertLabelsAndKernels, activeSet)

    expertLabelsAndKernels.unpersist()

    val optimalKernel = getKernel().setHyperparameters(optimalHyperparameters).setTrainingVectors(activeSet)

    // inv(sigma^2 K_mm + K_mn * K_nm) * K_mn * y
    val (magicVector, magicMatrix) = getMagicVector(optimalKernel,
      matrixKmnKnm, vectorKmny, activeSet, optimalHyperparameters)

    new GaussianProjectedProcessRawPredictor(magicVector, magicMatrix, optimalKernel)
  }

  
  protected def createModel(uid: String, rawPredictor: GaussianProjectedProcessRawPredictor) : M
}

class GaussianProjectedProcessRawPredictor private[commons] (val magicVector: BDV[Double],
                                                             val magicMatrix: BDM[Double],
                                                             val kernel: Kernel) extends Serializable {
  def predict(features: Vector): (Double, Double) = {
    val cross = kernel.crossKernel(features)
    val selfKernel = kernel.selfKernel(features)
    (cross * magicVector, selfKernel + cross * magicMatrix * cross.t)
  }
} 
Example 3
Source File: SparkModelConverter.scala    From TransmogrifAI   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package com.salesforce.op.stages.sparkwrappers.specific

import com.salesforce.op.features.types.{OPVector, Prediction, RealNN}
import com.salesforce.op.stages.base.binary.OpTransformer2
import com.salesforce.op.stages.impl.classification._
import com.salesforce.op.stages.impl.regression._
import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassificationModel, XGBoostRegressionModel}
import org.apache.spark.ml.classification._
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.ml.regression._
import org.apache.spark.ml.{Model, PredictionModel}


  // TODO remove when loco and model selector are updated
  def toOPUnchecked(
    model: Model[_],
    uid: String
  ): OpTransformer2[RealNN, OPVector, Prediction] = {
    model match {
      case m: LogisticRegressionModel => new OpLogisticRegressionModel(m, uid = uid)
      case m: RandomForestClassificationModel => new OpRandomForestClassificationModel(m, uid = uid)
      case m: NaiveBayesModel => new OpNaiveBayesModel(m, uid)
      case m: DecisionTreeClassificationModel => new OpDecisionTreeClassificationModel(m, uid = uid)
      case m: GBTClassificationModel => new OpGBTClassificationModel(m, uid = uid)
      case m: LinearSVCModel => new OpLinearSVCModel(m, uid = uid)
      case m: MultilayerPerceptronClassificationModel => new OpMultilayerPerceptronClassificationModel(m, uid = uid)
      case m: LinearRegressionModel => new OpLinearRegressionModel(m, uid = uid)
      case m: RandomForestRegressionModel => new OpRandomForestRegressionModel(m, uid = uid)
      case m: GBTRegressionModel => new OpGBTRegressionModel(m, uid = uid)
      case m: DecisionTreeRegressionModel => new OpDecisionTreeRegressionModel(m, uid = uid)
      case m: GeneralizedLinearRegressionModel => new OpGeneralizedLinearRegressionModel(m, uid = uid)
      case m: XGBoostClassificationModel => new OpXGBoostClassificationModel(m, uid = uid)
      case m: XGBoostRegressionModel => new OpXGBoostRegressionModel(m, uid = uid)
      case m => throw new RuntimeException(s"model conversion not implemented for model $m")
    }
  }

}