org.apache.spark.ml.regression.LinearRegressionModel Scala Examples

The following examples show how to use org.apache.spark.ml.regression.LinearRegressionModel. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: LocalLinearRegressionModel.scala    From spark-ml-serving   with Apache License 2.0 5 votes vote down vote up
package io.hydrosphere.spark_ml_serving.regression

import io.hydrosphere.spark_ml_serving.TypedTransformerConverter
import io.hydrosphere.spark_ml_serving.common._
import io.hydrosphere.spark_ml_serving.common.utils.DataUtils
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.ml.regression.LinearRegressionModel

class LocalLinearRegressionModel(override val sparkTransformer: LinearRegressionModel)
  extends LocalPredictionModel[LinearRegressionModel] {}

object LocalLinearRegressionModel
  extends SimpleModelLoader[LinearRegressionModel]
  with TypedTransformerConverter[LinearRegressionModel] {

  override def build(metadata: Metadata, data: LocalData): LinearRegressionModel = {
    val intercept       = data.column("intercept").get.data.head.asInstanceOf[java.lang.Double]
    val coeffitientsMap = data.column("coefficients").get.data.head.asInstanceOf[Map[String, Any]]
    val coeffitients    = DataUtils.constructVector(coeffitientsMap)

    val ctor = classOf[LinearRegressionModel].getConstructor(
      classOf[String],
      classOf[Vector],
      classOf[Double]
    )
    val inst = ctor.newInstance(metadata.uid, coeffitients, intercept)
    inst
      .set(inst.featuresCol, metadata.paramMap("featuresCol").asInstanceOf[String])
      .set(inst.predictionCol, metadata.paramMap("predictionCol").asInstanceOf[String])
      .set(inst.labelCol, metadata.paramMap("labelCol").asInstanceOf[String])
      .set(inst.elasticNetParam, metadata.paramMap("elasticNetParam").toString.toDouble)
      .set(inst.maxIter, metadata.paramMap("maxIter").asInstanceOf[Number].intValue())
      .set(inst.regParam, metadata.paramMap("regParam").toString.toDouble)
      .set(inst.solver, metadata.paramMap("solver").asInstanceOf[String])
      .set(inst.tol, metadata.paramMap("tol").toString.toDouble)
      .set(inst.standardization, metadata.paramMap("standardization").asInstanceOf[Boolean])
      .set(inst.fitIntercept, metadata.paramMap("fitIntercept").asInstanceOf[Boolean])
  }

  override implicit def toLocal(
    transformer: LinearRegressionModel
  ) = new LocalLinearRegressionModel(transformer)
} 
Example 2
Source File: DecisionTreeRegressionOp.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.bundle.ops.regression

import ml.combust.bundle.BundleContext
import ml.combust.bundle.op.{OpModel, OpNode}
import ml.combust.bundle.dsl._
import ml.combust.bundle.tree.decision.TreeSerializer
import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext}
import org.apache.spark.ml.bundle.tree.decision.SparkNodeWrapper
import org.apache.spark.ml.param.Param
import org.apache.spark.ml.regression.{DecisionTreeRegressionModel, LinearRegressionModel}


class DecisionTreeRegressionOp extends SimpleSparkOp[DecisionTreeRegressionModel] {
  implicit val nodeWrapper = SparkNodeWrapper

  override val Model: OpModel[SparkBundleContext, DecisionTreeRegressionModel] = new OpModel[SparkBundleContext, DecisionTreeRegressionModel] {
    override val klazz: Class[DecisionTreeRegressionModel] = classOf[DecisionTreeRegressionModel]

    override def opName: String = Bundle.BuiltinOps.regression.decision_tree_regression

    override def store(model: Model, obj: DecisionTreeRegressionModel)
                      (implicit context: BundleContext[SparkBundleContext]): Model = {
      TreeSerializer[org.apache.spark.ml.tree.Node](context.file("tree"), withImpurities = false).write(obj.rootNode)
      model.withValue("num_features", Value.long(obj.numFeatures))
    }

    override def load(model: Model)
                     (implicit context: BundleContext[SparkBundleContext]): DecisionTreeRegressionModel = {
      val rootNode = TreeSerializer[org.apache.spark.ml.tree.Node](context.file("tree"), withImpurities = false).read().get
      new DecisionTreeRegressionModel(uid = "",
        rootNode = rootNode,
        numFeatures = model.value("num_features").getLong.toInt)
    }
  }

  override def sparkLoad(uid: String, shape: NodeShape, model: DecisionTreeRegressionModel): DecisionTreeRegressionModel = {
    new DecisionTreeRegressionModel(uid = uid,
      rootNode = model.rootNode,
      numFeatures = model.numFeatures)
  }

  override def sparkInputs(obj: DecisionTreeRegressionModel): Seq[ParamSpec] = {
    Seq("features" -> obj.featuresCol)
  }

  override def sparkOutputs(obj: DecisionTreeRegressionModel): Seq[SimpleParamSpec] = {
    Seq("prediction" -> obj.predictionCol)
  }
} 
Example 3
Source File: LinearRegressionOp.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.bundle.ops.regression

import ml.combust.bundle.BundleContext
import ml.combust.bundle.op.{OpModel, OpNode}
import ml.combust.bundle.dsl._
import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext}
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.ml.param.Param
import org.apache.spark.ml.regression.LinearRegressionModel


class LinearRegressionOp extends SimpleSparkOp[LinearRegressionModel] {
  override val Model: OpModel[SparkBundleContext, LinearRegressionModel] = new OpModel[SparkBundleContext, LinearRegressionModel] {
    override val klazz: Class[LinearRegressionModel] = classOf[LinearRegressionModel]

    override def opName: String = Bundle.BuiltinOps.regression.linear_regression

    override def store(model: Model, obj: LinearRegressionModel)
                      (implicit context: BundleContext[SparkBundleContext]): Model = {
      model.withValue("coefficients", Value.vector(obj.coefficients.toArray)).
        withValue("intercept", Value.double(obj.intercept))
    }

    override def load(model: Model)
                     (implicit context: BundleContext[SparkBundleContext]): LinearRegressionModel = {
      new LinearRegressionModel(uid = "",
        coefficients = Vectors.dense(model.value("coefficients").getTensor[Double].toArray),
        intercept = model.value("intercept").getDouble)
    }
  }

  override def sparkLoad(uid: String, shape: NodeShape, model: LinearRegressionModel): LinearRegressionModel = {
    new LinearRegressionModel(uid = uid,
      coefficients = model.coefficients,
      intercept = model.intercept)
  }

  override def sparkInputs(obj: LinearRegressionModel): Seq[ParamSpec] = {
    Seq("features" -> obj.featuresCol)
  }

  override def sparkOutputs(obj: LinearRegressionModel): Seq[SimpleParamSpec] = {
    Seq("prediction" -> obj.predictionCol)
  }
} 
Example 4
Source File: TypedLinearRegression.scala    From frameless   with Apache License 2.0 5 votes vote down vote up
package frameless
package ml
package regression

import frameless.ml.internals.LinearInputsChecker
import frameless.ml.params.linears.{LossStrategy, Solver}
import frameless.ml.{AppendTransformer, TypedEstimator}
import org.apache.spark.ml.regression.{LinearRegression, LinearRegressionModel}


final class TypedLinearRegression [Inputs] private[ml](
  lr: LinearRegression,
  labelCol: String,
  featuresCol: String,
  weightCol: Option[String]
) extends TypedEstimator[Inputs, TypedLinearRegression.Outputs, LinearRegressionModel] {

  val estimatorWithoutWeight : LinearRegression = lr
    .setLabelCol(labelCol)
    .setFeaturesCol(featuresCol)
    .setPredictionCol(AppendTransformer.tempColumnName)

  val estimator = if (weightCol.isDefined) estimatorWithoutWeight.setWeightCol(weightCol.get) else estimatorWithoutWeight

  def setRegParam(value: Double):           TypedLinearRegression[Inputs] = copy(lr.setRegParam(value))
  def setFitIntercept(value: Boolean):      TypedLinearRegression[Inputs] = copy(lr.setFitIntercept(value))
  def setStandardization(value: Boolean):   TypedLinearRegression[Inputs] = copy(lr.setStandardization(value))
  def setElasticNetParam(value: Double):    TypedLinearRegression[Inputs] = copy(lr.setElasticNetParam(value))
  def setMaxIter(value: Int):               TypedLinearRegression[Inputs] = copy(lr.setMaxIter(value))
  def setTol(value: Double):                TypedLinearRegression[Inputs] = copy(lr.setTol(value))
  def setSolver(value: Solver):             TypedLinearRegression[Inputs] = copy(lr.setSolver(value.sparkValue))
  def setAggregationDepth(value: Int):      TypedLinearRegression[Inputs] = copy(lr.setAggregationDepth(value))
  def setLoss(value: LossStrategy):         TypedLinearRegression[Inputs] = copy(lr.setLoss(value.sparkValue))
  def setEpsilon(value: Double):            TypedLinearRegression[Inputs] = copy(lr.setEpsilon(value))

  private def copy(newLr: LinearRegression): TypedLinearRegression[Inputs] =
    new TypedLinearRegression[Inputs](newLr, labelCol, featuresCol, weightCol)

}

object TypedLinearRegression {
  case class Outputs(prediction: Double)
  case class Weight(weight: Double)


  def apply[Inputs](implicit inputsChecker: LinearInputsChecker[Inputs]): TypedLinearRegression[Inputs] = {
    new TypedLinearRegression(new LinearRegression(), inputsChecker.labelCol, inputsChecker.featuresCol, inputsChecker.weightCol)
  }
} 
Example 5
Source File: OpPredictorWrapperTest.scala    From TransmogrifAI   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package com.salesforce.op.stages.sparkwrappers.specific

import com.salesforce.op.features.types._
import com.salesforce.op.stages.sparkwrappers.generic.SparkWrapperParams
import com.salesforce.op.test.{PrestigeData, TestFeatureBuilder, TestSparkContext}
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.ml.regression.{LinearRegression, LinearRegressionModel}
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
import org.scalatest.junit.JUnitRunner
import org.slf4j.LoggerFactory


@RunWith(classOf[JUnitRunner])
class OpPredictorWrapperTest extends FlatSpec with TestSparkContext with PrestigeData {

  val log = LoggerFactory.getLogger(this.getClass)

  val (ds, targetLabel, featureVector) = TestFeatureBuilder[RealNN, OPVector](
    prestigeSeq.map(p => p.prestige.toRealNN -> Vectors.dense(p.education, p.income, p.women).toOPVector)
  )

  Spec[OpPredictorWrapper[_, _]] should
    "be able to run a simple logistic regression model (fitIntercept=true)" in {
    val lrModel: LinearRegressionModel = fitLinRegModel(fitIntercept = true)
    lrModel.intercept.abs should be > 1E-6
  }

  it should "be able to run a simple logistic regression model (fitIntercept=false)" in {
    val lrModel: LinearRegressionModel = fitLinRegModel(fitIntercept = false)
    lrModel.intercept.abs should be < Double.MinPositiveValue
  }

  private def fitLinRegModel(fitIntercept: Boolean): LinearRegressionModel = {
    val lrBase =
      new LinearRegression()
        .setMaxIter(10)
        .setRegParam(0.3)
        .setElasticNetParam(0.8)
        .setFitIntercept(fitIntercept)

    val lr = new OpPredictorWrapper[LinearRegression, LinearRegressionModel](lrBase)
      .setInput(targetLabel, featureVector)

    // Fit the model
    val model = lr.fit(ds).asInstanceOf[SparkWrapperParams[LinearRegressionModel]]
    val lrModel = model.getSparkMlStage().get

    // Print the coefficients and intercept for linear regression
    log.info(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")

    // Summarize the model over the training set and print out some metrics
    val trainingSummary = lrModel.summary
    log.info(s"numIterations: ${trainingSummary.totalIterations}")
    log.info(s"objectiveHistory: [${trainingSummary.objectiveHistory.mkString(",")}]")
    if (log.isInfoEnabled) trainingSummary.residuals.show()
    log.info(s"RMSE: ${trainingSummary.rootMeanSquaredError}")
    log.info(s"r2: ${trainingSummary.r2}")
    // checking r2 as a cheap way to make sure things are running as intended.
    assert(trainingSummary.r2 > 0.9)

    if (log.isInfoEnabled) {
      val output = lrModel.transform(ds)
      output.show(false)
    }

    lrModel
  }
} 
Example 6
Source File: OpLinearRegressionTest.scala    From TransmogrifAI   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package com.salesforce.op.stages.impl.regression

import com.salesforce.op.features.types._
import com.salesforce.op.stages.base.binary.{BinaryEstimator, BinaryModel}
import com.salesforce.op.stages.impl.PredictionEquality
import com.salesforce.op.stages.sparkwrappers.specific.{OpPredictorWrapper, OpPredictorWrapperModel}
import com.salesforce.op.test._
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.ml.regression.{LinearRegression, LinearRegressionModel}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class OpLinearRegressionTest extends OpEstimatorSpec[Prediction, OpPredictorWrapperModel[LinearRegressionModel],
  OpPredictorWrapper[LinearRegression, LinearRegressionModel]] with PredictionEquality {

  override def specName: String = Spec[OpLinearRegression]

  val (inputData, rawLabel, features) = TestFeatureBuilder(
    Seq[(RealNN, OPVector)](
      (10.0.toRealNN, Vectors.dense(1.0, 4.3, 1.3).toOPVector),
      (20.0.toRealNN, Vectors.dense(2.0, 0.3, 0.1).toOPVector),
      (30.0.toRealNN, Vectors.dense(3.0, 3.9, 4.3).toOPVector),
      (40.0.toRealNN, Vectors.dense(4.0, 1.3, 0.9).toOPVector),
      (50.0.toRealNN, Vectors.dense(5.0, 4.7, 1.3).toOPVector)
    )
  )
  val label = rawLabel.copy(isResponse = true)
  val estimator = new OpLinearRegression().setInput(label, features)

  val expectedResult = Seq(
    Prediction(10.0),
    Prediction(20.0),
    Prediction(30.0),
    Prediction(40.0),
    Prediction(50.0)
  )

  it should "allow the user to set the desired spark parameters" in {
    estimator
      .setMaxIter(10)
      .setRegParam(0.1)
      .setFitIntercept(true)
      .setElasticNetParam(0.1)
      .setSolver("normal")
    estimator.fit(inputData)

    estimator.predictor.getMaxIter shouldBe 10
    estimator.predictor.getRegParam shouldBe 0.1
    estimator.predictor.getFitIntercept shouldBe true
    estimator.predictor.getElasticNetParam shouldBe 0.1
    estimator.predictor.getSolver shouldBe "normal"

  }
} 
Example 7
Source File: SparkRWrappers.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.api.r

import org.apache.spark.ml.attribute._
import org.apache.spark.ml.feature.RFormula
import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
import org.apache.spark.ml.regression.{LinearRegression, LinearRegressionModel}
import org.apache.spark.ml.{Pipeline, PipelineModel}
import org.apache.spark.sql.DataFrame

private[r] object SparkRWrappers {
  def fitRModelFormula(
      value: String,
      df: DataFrame,
      family: String,
      lambda: Double,
      alpha: Double): PipelineModel = {
    val formula = new RFormula().setFormula(value)
    val estimator = family match {
      case "gaussian" => new LinearRegression()
        .setRegParam(lambda)
        .setElasticNetParam(alpha)
        .setFitIntercept(formula.hasIntercept)
      case "binomial" => new LogisticRegression()
        .setRegParam(lambda)
        .setElasticNetParam(alpha)
        .setFitIntercept(formula.hasIntercept)
    }
    val pipeline = new Pipeline().setStages(Array(formula, estimator))
    pipeline.fit(df)
  }

  def getModelWeights(model: PipelineModel): Array[Double] = {
    model.stages.last match {
      case m: LinearRegressionModel =>
        Array(m.intercept) ++ m.weights.toArray
      case _: LogisticRegressionModel =>
        throw new UnsupportedOperationException(
          "No weights available for LogisticRegressionModel")  // SPARK-9492
    }
  }

  def getModelFeatures(model: PipelineModel): Array[String] = {
    model.stages.last match {
      case m: LinearRegressionModel =>
        val attrs = AttributeGroup.fromStructField(
          m.summary.predictions.schema(m.summary.featuresCol))
        Array("(Intercept)") ++ attrs.attributes.get.map(_.name.get)
      case _: LogisticRegressionModel =>
        throw new UnsupportedOperationException(
          "No features names available for LogisticRegressionModel")  // SPARK-9492
    }
  }
} 
Example 8
Source File: SparkRWrappers.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.api.r

import org.apache.spark.ml.attribute._
import org.apache.spark.ml.feature.RFormula
import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
import org.apache.spark.ml.regression.{LinearRegression, LinearRegressionModel}
import org.apache.spark.ml.{Pipeline, PipelineModel}
import org.apache.spark.sql.DataFrame

private[r] object SparkRWrappers {
  def fitRModelFormula(
      value: String,
      df: DataFrame,
      family: String,
      lambda: Double,
      alpha: Double,
      standardize: Boolean,
      solver: String): PipelineModel = {
    val formula = new RFormula().setFormula(value)
    val estimator = family match {
      case "gaussian" => new LinearRegression()
        .setRegParam(lambda)
        .setElasticNetParam(alpha)
        .setFitIntercept(formula.hasIntercept)
        .setStandardization(standardize)
        .setSolver(solver)
      case "binomial" => new LogisticRegression()
        .setRegParam(lambda)
        .setElasticNetParam(alpha)
        .setFitIntercept(formula.hasIntercept)
        .setStandardization(standardize)
    }
    val pipeline = new Pipeline().setStages(Array(formula, estimator))
    pipeline.fit(df)
  }

  def getModelCoefficients(model: PipelineModel): Array[Double] = {
    model.stages.last match {
      case m: LinearRegressionModel => {
        val coefficientStandardErrorsR = Array(m.summary.coefficientStandardErrors.last) ++
          m.summary.coefficientStandardErrors.dropRight(1)
        val tValuesR = Array(m.summary.tValues.last) ++ m.summary.tValues.dropRight(1)
        val pValuesR = Array(m.summary.pValues.last) ++ m.summary.pValues.dropRight(1)
        if (m.getFitIntercept) {
          Array(m.intercept) ++ m.coefficients.toArray ++ coefficientStandardErrorsR ++
            tValuesR ++ pValuesR
        } else {
          m.coefficients.toArray ++ coefficientStandardErrorsR ++ tValuesR ++ pValuesR
        }
      }
      case m: LogisticRegressionModel => {
        if (m.getFitIntercept) {
          Array(m.intercept) ++ m.coefficients.toArray
        } else {
          m.coefficients.toArray
        }
      }
    }
  }

  def getModelDevianceResiduals(model: PipelineModel): Array[Double] = {
    model.stages.last match {
      case m: LinearRegressionModel =>
        m.summary.devianceResiduals
      case m: LogisticRegressionModel =>
        throw new UnsupportedOperationException(
          "No deviance residuals available for LogisticRegressionModel")
    }
  }

  def getModelFeatures(model: PipelineModel): Array[String] = {
    model.stages.last match {
      case m: LinearRegressionModel =>
        val attrs = AttributeGroup.fromStructField(
          m.summary.predictions.schema(m.summary.featuresCol))
        if (m.getFitIntercept) {
          Array("(Intercept)") ++ attrs.attributes.get.map(_.name.get)
        } else {
          attrs.attributes.get.map(_.name.get)
        }
      case m: LogisticRegressionModel =>
        val attrs = AttributeGroup.fromStructField(
          m.summary.predictions.schema(m.summary.featuresCol))
        if (m.getFitIntercept) {
          Array("(Intercept)") ++ attrs.attributes.get.map(_.name.get)
        } else {
          attrs.attributes.get.map(_.name.get)
        }
    }
  }

  def getModelName(model: PipelineModel): String = {
    model.stages.last match {
      case m: LinearRegressionModel =>
        "LinearRegressionModel"
      case m: LogisticRegressionModel =>
        "LogisticRegressionModel"
    }
  }
} 
Example 9
Source File: BaseTransformerConverter.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.mleap.converter.runtime

import com.truecar.mleap.runtime.transformer
import org.apache.spark.ml.PipelineModel
import org.apache.spark.ml.classification.RandomForestClassificationModel
import org.apache.spark.ml.feature.{IndexToString, StandardScalerModel, StringIndexerModel, VectorAssembler}
import org.apache.spark.ml.mleap.classification.SVMModel
import org.apache.spark.ml.mleap.converter.runtime.classification.{RandomForestClassificationModelToMleap, SupportVectorMachineModelToMleap}
import org.apache.spark.ml.mleap.converter.runtime.feature.{IndexToStringToMleap, StandardScalerModelToMleap, StringIndexerModelToMleap, VectorAssemblerModelToMleap}
import org.apache.spark.ml.mleap.converter.runtime.regression.{LinearRegressionModelToMleap, RandomForestRegressionModelToMleap}
import org.apache.spark.ml.regression.{LinearRegressionModel, RandomForestRegressionModel}


trait BaseTransformerConverter extends SparkTransformerConverter {
  // regression
  implicit val mleapLinearRegressionModelToMleap: TransformerToMleap[LinearRegressionModel, transformer.LinearRegressionModel] =
    addConverter(LinearRegressionModelToMleap)
  implicit val mleapRandomForestRegressionModelToMleap: TransformerToMleap[RandomForestRegressionModel, transformer.RandomForestRegressionModel] =
    addConverter(RandomForestRegressionModelToMleap)

  // classification
  implicit val mleapRandomForestClassificationModelToMleap: TransformerToMleap[RandomForestClassificationModel, transformer.RandomForestClassificationModel] =
    addConverter(RandomForestClassificationModelToMleap)
  implicit val mleapSupportVectorMachineModelToMleap: TransformerToMleap[SVMModel, transformer.SupportVectorMachineModel] =
    addConverter(SupportVectorMachineModelToMleap)

  //feature
  implicit val mleapIndexToStringToMleap: TransformerToMleap[IndexToString, transformer.ReverseStringIndexerModel] =
    addConverter(IndexToStringToMleap)
  implicit val mleapStandardScalerModelToMleap: TransformerToMleap[StandardScalerModel, transformer.StandardScalerModel] =
    addConverter(StandardScalerModelToMleap)
  implicit val mleapStringIndexerModelToMleap: TransformerToMleap[StringIndexerModel, transformer.StringIndexerModel] =
    addConverter(StringIndexerModelToMleap)
  implicit val mleapVectorAssemblerToMleap: TransformerToMleap[VectorAssembler, transformer.VectorAssemblerModel] =
    addConverter(VectorAssemblerModelToMleap)

  // other
  implicit val mleapPipelineModelToMleap: TransformerToMleap[PipelineModel, transformer.PipelineModel] =
    addConverter(PipelineModelToMleap(this))
}
object BaseTransformerConverter extends BaseTransformerConverter