org.apache.spark.ml.classification.RandomForestClassificationModel Scala Examples

The following examples show how to use org.apache.spark.ml.classification.RandomForestClassificationModel. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: RandomForestPrediction.scala    From piflow   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package cn.piflow.bundle.ml_classification

import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
import cn.piflow.conf.{ConfigurableStop, Port, StopGroup}
import cn.piflow.conf.bean.PropertyDescriptor
import cn.piflow.conf.util.{ImageUtil, MapUtil}
import org.apache.spark.ml.classification.RandomForestClassificationModel
import org.apache.spark.sql.SparkSession

class RandomForestPrediction extends ConfigurableStop{
  val authorEmail: String = "[email protected]"
  val description: String = "use an existing RandomForest Model to predict"
  val inportList: List[String] = List(Port.DefaultPort)
  val outportList: List[String] = List(Port.DefaultPort)
  var test_data_path:String =_
  var model_path:String=_


  def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
    val spark = pec.get[SparkSession]()
    //load data stored in libsvm format as a dataframe
    val data=spark.read.format("libsvm").load(test_data_path)
    //data.show()

    //load model
    val model=RandomForestClassificationModel.load(model_path)

    val predictions=model.transform(data)
    predictions.show()
    out.write(predictions)

  }

  def initialize(ctx: ProcessContext): Unit = {

  }


  def setProperties(map: Map[String, Any]): Unit = {
    test_data_path=MapUtil.get(map,key="test_data_path").asInstanceOf[String]
    model_path=MapUtil.get(map,key="model_path").asInstanceOf[String]
  }

  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
    val test_data_path = new PropertyDescriptor().name("test_data_path").displayName("TEST_DATA_PATH").defaultValue("").required(true)
    val model_path = new PropertyDescriptor().name("model_path").displayName("MODEL_PATH").defaultValue("").required(true)
    descriptor = test_data_path :: descriptor
    descriptor = model_path :: descriptor
    descriptor
  }

  override def getIcon(): Array[Byte] = {
    ImageUtil.getImage("icon/ml_classification/RandomForestPrediction.png")
  }

  override def getGroup(): List[String] = {
    List(StopGroup.MLGroup.toString)
  }

} 
Example 2
Source File: RandomForestClassifierOp.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.bundle.ops.classification

import ml.combust.bundle.BundleContext
import ml.combust.bundle.op.{OpModel, OpNode}
import ml.combust.bundle.serializer.ModelSerializer
import ml.combust.bundle.dsl._
import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext}
import org.apache.spark.ml.bundle.tree.decision.SparkNodeWrapper
import org.apache.spark.ml.classification.{DecisionTreeClassificationModel, RandomForestClassificationModel}


class RandomForestClassifierOp extends SimpleSparkOp[RandomForestClassificationModel] {
  implicit val nodeWrapper = SparkNodeWrapper

  override val Model: OpModel[SparkBundleContext, RandomForestClassificationModel] = new OpModel[SparkBundleContext, RandomForestClassificationModel] {
    override val klazz: Class[RandomForestClassificationModel] = classOf[RandomForestClassificationModel]

    override def opName: String = Bundle.BuiltinOps.classification.random_forest_classifier

    override def store(model: Model, obj: RandomForestClassificationModel)
                      (implicit context: BundleContext[SparkBundleContext]): Model = {
      var i = 0
      val trees = obj.trees.map {
        tree =>
          val name = s"tree$i"
          ModelSerializer(context.bundleContext(name)).write(tree).get
          i = i + 1
          name
      }
      val thresholds = if(obj.isSet(obj.thresholds)) {
        Some(obj.getThresholds)
      } else None
      model.withValue("num_features", Value.long(obj.numFeatures)).
        withValue("num_classes", Value.long(obj.numClasses)).
        withValue("tree_weights", Value.doubleList(obj.treeWeights)).
        withValue("trees", Value.stringList(trees)).
        withValue("thresholds", thresholds.map(_.toSeq).map(Value.doubleList))
    }

    override def load(model: Model)
                     (implicit context: BundleContext[SparkBundleContext]): RandomForestClassificationModel = {
      val numFeatures = model.value("num_features").getLong.toInt
      val numClasses = model.value("num_classes").getLong.toInt
      val treeWeights = model.value("tree_weights").getDoubleList

      // TODO: get rid of this when Spark supports setting tree weights
      for(weight <- treeWeights) { require(weight == 1.0, "tree weights must be 1.0 for Spark") }

      val models = model.value("trees").getStringList.map {
        tree => ModelSerializer(context.bundleContext(tree)).read().get.asInstanceOf[DecisionTreeClassificationModel]
      }.toArray

      val m = new RandomForestClassificationModel(uid = "",
        numFeatures = numFeatures,
        numClasses = numClasses,
        _trees = models)
      model.getValue("thresholds").
        map(t => m.setThresholds(t.getDoubleList.toArray)).
        getOrElse(m)
    }
  }

  override def sparkLoad(uid: String, shape: NodeShape, model: RandomForestClassificationModel): RandomForestClassificationModel = {
    val r = new RandomForestClassificationModel(uid = uid,
      _trees = model.trees,
      numFeatures = model.numFeatures,
      numClasses = model.numClasses)
    if (model.isDefined(model.thresholds)) { r.setThresholds(model.getThresholds) }
    r
  }

  override def sparkInputs(obj: RandomForestClassificationModel): Seq[ParamSpec] = {
    Seq("features" -> obj.featuresCol)
  }

  override def sparkOutputs(obj: RandomForestClassificationModel): Seq[SimpleParamSpec] = {
    Seq("raw_prediction" -> obj.rawPredictionCol,
      "probability" -> obj.probabilityCol,
      "prediction" -> obj.predictionCol)
  }
} 
Example 3
Source File: TypedRandomForestClassifier.scala    From frameless   with Apache License 2.0 5 votes vote down vote up
package frameless
package ml
package classification

import frameless.ml.internals.TreesInputsChecker
import frameless.ml.params.trees.FeatureSubsetStrategy
import org.apache.spark.ml.classification.{RandomForestClassificationModel, RandomForestClassifier}
import org.apache.spark.ml.linalg.Vector


final class TypedRandomForestClassifier[Inputs] private[ml](
  rf: RandomForestClassifier,
  labelCol: String,
  featuresCol: String
) extends TypedEstimator[Inputs, TypedRandomForestClassifier.Outputs, RandomForestClassificationModel] {

  val estimator: RandomForestClassifier =
    rf
      .setLabelCol(labelCol)
      .setFeaturesCol(featuresCol)
      .setPredictionCol(AppendTransformer.tempColumnName)
      .setRawPredictionCol(AppendTransformer.tempColumnName2)
      .setProbabilityCol(AppendTransformer.tempColumnName3)

  def setNumTrees(value: Int): TypedRandomForestClassifier[Inputs] = copy(rf.setNumTrees(value))
  def setMaxDepth(value: Int): TypedRandomForestClassifier[Inputs] = copy(rf.setMaxDepth(value))
  def setMinInfoGain(value: Double): TypedRandomForestClassifier[Inputs] = copy(rf.setMinInfoGain(value))
  def setMinInstancesPerNode(value: Int): TypedRandomForestClassifier[Inputs] = copy(rf.setMinInstancesPerNode(value))
  def setMaxMemoryInMB(value: Int): TypedRandomForestClassifier[Inputs] = copy(rf.setMaxMemoryInMB(value))
  def setSubsamplingRate(value: Double): TypedRandomForestClassifier[Inputs] = copy(rf.setSubsamplingRate(value))
  def setFeatureSubsetStrategy(value: FeatureSubsetStrategy): TypedRandomForestClassifier[Inputs] =
    copy(rf.setFeatureSubsetStrategy(value.sparkValue))
  def setMaxBins(value: Int): TypedRandomForestClassifier[Inputs] = copy(rf.setMaxBins(value))

  private def copy(newRf: RandomForestClassifier): TypedRandomForestClassifier[Inputs] =
    new TypedRandomForestClassifier[Inputs](newRf, labelCol, featuresCol)
}

object TypedRandomForestClassifier {
  case class Outputs(rawPrediction: Vector, probability: Vector, prediction: Double)

  def apply[Inputs](implicit inputsChecker: TreesInputsChecker[Inputs]): TypedRandomForestClassifier[Inputs] = {
    new TypedRandomForestClassifier(new RandomForestClassifier(), inputsChecker.labelCol, inputsChecker.featuresCol)
  }
} 
Example 4
Source File: OpRandomForestClassifierTest.scala    From TransmogrifAI   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package com.salesforce.op.stages.impl.classification

import com.salesforce.op.features.types._
import com.salesforce.op.stages.impl.PredictionEquality
import com.salesforce.op.stages.sparkwrappers.specific.{OpPredictorWrapper, OpPredictorWrapperModel}
import com.salesforce.op.test.{OpEstimatorSpec, TestFeatureBuilder}
import org.apache.spark.ml.classification.{RandomForestClassificationModel, RandomForestClassifier}
import org.apache.spark.ml.linalg.Vectors
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner

@RunWith(classOf[JUnitRunner])
class OpRandomForestClassifierTest extends
  OpEstimatorSpec[Prediction, OpPredictorWrapperModel[RandomForestClassificationModel],
    OpPredictorWrapper[RandomForestClassifier, RandomForestClassificationModel]] with PredictionEquality {

  override def specName: String = Spec[OpRandomForestClassifier]

  lazy val (inputData, rawLabelMulti, featuresMulti) =
    TestFeatureBuilder[RealNN, OPVector]("labelMulti", "featuresMulti",
      Seq(
        (1.0.toRealNN, Vectors.dense(12.0, 4.3, 1.3).toOPVector),
        (0.0.toRealNN, Vectors.dense(0.0, 0.3, 0.1).toOPVector),
        (2.0.toRealNN, Vectors.dense(1.0, 3.9, 4.3).toOPVector),
        (2.0.toRealNN, Vectors.dense(10.0, 1.3, 0.9).toOPVector),
        (1.0.toRealNN, Vectors.dense(15.0, 4.7, 1.3).toOPVector),
        (0.0.toRealNN, Vectors.dense(0.5, 0.9, 10.1).toOPVector),
        (1.0.toRealNN, Vectors.dense(11.5, 2.3, 1.3).toOPVector),
        (0.0.toRealNN, Vectors.dense(0.1, 3.3, 0.1).toOPVector),
        (2.0.toRealNN, Vectors.dense(1.0, 4.0, 4.5).toOPVector),
        (2.0.toRealNN, Vectors.dense(10.0, 1.5, 1.0).toOPVector)
      )
    )

  val labelMulti = rawLabelMulti.copy(isResponse = true)

  val estimator = new OpRandomForestClassifier().setInput(labelMulti, featuresMulti)

  val expectedResult = Seq(
    Prediction(1.0, Array(0.0, 17.0, 3.0), Array(0.0, 0.85, 0.15)),
    Prediction(0.0, Array(19.0, 0.0, 1.0), Array(0.95, 0.0, 0.05)),
    Prediction(2.0, Array(0.0, 1.0, 19.0), Array(0.0, 0.05, 0.95)),
    Prediction(2.0, Array(1.0, 2.0, 17.0), Array(0.05, 0.1, 0.85)),
    Prediction(1.0, Array(0.0, 17.0, 3.0), Array(0.0, 0.85, 0.15)),
    Prediction(0.0, Array(16.0, 0.0, 4.0), Array(0.8, 0.0, 0.2)),
    Prediction(1.0, Array(1.0, 17.0, 2.0), Array(0.05, 0.85, 0.1)),
    Prediction(0.0, Array(17.0, 0.0, 3.0), Array(0.85, 0.0, 0.15)),
    Prediction(2.0, Array(2.0, 1.0, 17.0), Array(0.1, 0.05, 0.85)),
    Prediction(2.0, Array(1.0, 2.0, 17.0), Array(0.05, 0.1, 0.85))
  )

  it should "allow the user to set the desired spark parameters" in {
    estimator
      .setMaxDepth(10)
      .setImpurity(Impurity.Gini.sparkName)
      .setMaxBins(33)
      .setMinInstancesPerNode(2)
      .setMinInfoGain(0.2)
      .setSubsamplingRate(0.9)
      .setNumTrees(21)
      .setSeed(2L)
    estimator.fit(inputData)

    estimator.predictor.getMaxDepth shouldBe 10
    estimator.predictor.getMaxBins shouldBe 33
    estimator.predictor.getImpurity shouldBe Impurity.Gini.sparkName
    estimator.predictor.getMinInstancesPerNode shouldBe 2
    estimator.predictor.getMinInfoGain shouldBe 0.2
    estimator.predictor.getSubsamplingRate shouldBe 0.9
    estimator.predictor.getNumTrees shouldBe 21
    estimator.predictor.getSeed shouldBe 2L
  }

} 
Example 5
Source File: RandomForestClassificationModelToMleap.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.mleap.converter.runtime.classification

import com.truecar.mleap.core.classification.RandomForestClassification
import com.truecar.mleap.runtime.transformer
import org.apache.spark.ml.classification.{DecisionTreeClassificationModel, RandomForestClassificationModel}
import org.apache.spark.ml.mleap.converter.runtime.TransformerToMleap


object RandomForestClassificationModelToMleap extends TransformerToMleap[RandomForestClassificationModel, transformer.RandomForestClassificationModel] {
  override def toMleap(t: RandomForestClassificationModel): transformer.RandomForestClassificationModel = {
    val trees = t.trees.asInstanceOf[Array[DecisionTreeClassificationModel]]
      .map(tree => DecisionTreeClassificationModelToMleap(tree).toMleap)
    val model = RandomForestClassification(trees,
      t.numFeatures,
      t.numClasses)

    transformer.RandomForestClassificationModel(t.getFeaturesCol,
      t.getPredictionCol,
      model)
  }
} 
Example 6
Source File: BaseTransformerConverter.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.mleap.converter.runtime

import com.truecar.mleap.runtime.transformer
import org.apache.spark.ml.PipelineModel
import org.apache.spark.ml.classification.RandomForestClassificationModel
import org.apache.spark.ml.feature.{IndexToString, StandardScalerModel, StringIndexerModel, VectorAssembler}
import org.apache.spark.ml.mleap.classification.SVMModel
import org.apache.spark.ml.mleap.converter.runtime.classification.{RandomForestClassificationModelToMleap, SupportVectorMachineModelToMleap}
import org.apache.spark.ml.mleap.converter.runtime.feature.{IndexToStringToMleap, StandardScalerModelToMleap, StringIndexerModelToMleap, VectorAssemblerModelToMleap}
import org.apache.spark.ml.mleap.converter.runtime.regression.{LinearRegressionModelToMleap, RandomForestRegressionModelToMleap}
import org.apache.spark.ml.regression.{LinearRegressionModel, RandomForestRegressionModel}


trait BaseTransformerConverter extends SparkTransformerConverter {
  // regression
  implicit val mleapLinearRegressionModelToMleap: TransformerToMleap[LinearRegressionModel, transformer.LinearRegressionModel] =
    addConverter(LinearRegressionModelToMleap)
  implicit val mleapRandomForestRegressionModelToMleap: TransformerToMleap[RandomForestRegressionModel, transformer.RandomForestRegressionModel] =
    addConverter(RandomForestRegressionModelToMleap)

  // classification
  implicit val mleapRandomForestClassificationModelToMleap: TransformerToMleap[RandomForestClassificationModel, transformer.RandomForestClassificationModel] =
    addConverter(RandomForestClassificationModelToMleap)
  implicit val mleapSupportVectorMachineModelToMleap: TransformerToMleap[SVMModel, transformer.SupportVectorMachineModel] =
    addConverter(SupportVectorMachineModelToMleap)

  //feature
  implicit val mleapIndexToStringToMleap: TransformerToMleap[IndexToString, transformer.ReverseStringIndexerModel] =
    addConverter(IndexToStringToMleap)
  implicit val mleapStandardScalerModelToMleap: TransformerToMleap[StandardScalerModel, transformer.StandardScalerModel] =
    addConverter(StandardScalerModelToMleap)
  implicit val mleapStringIndexerModelToMleap: TransformerToMleap[StringIndexerModel, transformer.StringIndexerModel] =
    addConverter(StringIndexerModelToMleap)
  implicit val mleapVectorAssemblerToMleap: TransformerToMleap[VectorAssembler, transformer.VectorAssemblerModel] =
    addConverter(VectorAssemblerModelToMleap)

  // other
  implicit val mleapPipelineModelToMleap: TransformerToMleap[PipelineModel, transformer.PipelineModel] =
    addConverter(PipelineModelToMleap(this))
}
object BaseTransformerConverter extends BaseTransformerConverter