org.apache.spark.ml.regression.DecisionTreeRegressionModel Java Examples

The following examples show how to use org.apache.spark.ml.regression.DecisionTreeRegressionModel. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DecisionTreeRegressionModelInfoAdapter.java    From spark-transformers with Apache License 2.0 6 votes vote down vote up
public DecisionTreeModelInfo getModelInfo(final DecisionTreeRegressionModel decisionTreeModel) {
    final DecisionTreeModelInfo treeInfo = new DecisionTreeModelInfo();

    Node rootNode = decisionTreeModel.rootNode();
    treeInfo.setRoot( DecisionNodeAdapterUtils.adaptNode(rootNode));

    final Set<String> inputKeys = new LinkedHashSet<String>();
    inputKeys.add(decisionTreeModel.getFeaturesCol());
    inputKeys.add(decisionTreeModel.getLabelCol());
    treeInfo.setInputKeys(inputKeys);

    final Set<String> outputKeys = new LinkedHashSet<String>();
    outputKeys.add(decisionTreeModel.getPredictionCol());
    treeInfo.setOutputKeys(outputKeys);

    return treeInfo;
}
 
Example #2
Source File: DecisionTreeRegressionModelInfoAdapter.java    From spark-transformers with Apache License 2.0 6 votes vote down vote up
public DecisionTreeModelInfo getModelInfo(final DecisionTreeRegressionModel decisionTreeModel, final DataFrame df) {
    final DecisionTreeModelInfo treeInfo = new DecisionTreeModelInfo();

    Node rootNode = decisionTreeModel.rootNode();
    treeInfo.setRoot( DecisionNodeAdapterUtils.adaptNode(rootNode));

    final Set<String> inputKeys = new LinkedHashSet<String>();
    inputKeys.add(decisionTreeModel.getFeaturesCol());
    inputKeys.add(decisionTreeModel.getLabelCol());
    treeInfo.setInputKeys(inputKeys);

    final Set<String> outputKeys = new LinkedHashSet<String>();
    outputKeys.add(decisionTreeModel.getPredictionCol());
    treeInfo.setOutputKeys(outputKeys);

    return treeInfo;
}
 
Example #3
Source File: GradientBoostClassificationModelInfoAdapter.java    From spark-transformers with Apache License 2.0 5 votes vote down vote up
@Override
GradientBoostModelInfo getModelInfo(final GBTClassificationModel sparkGbModel) {
    final GradientBoostModelInfo modelInfo = new GradientBoostModelInfo();

    modelInfo.setNumFeatures(sparkGbModel.numFeatures());
    modelInfo.setRegression(false); //false for classification

    final List<Double> treeWeights = new ArrayList<Double>();
    for (double w : sparkGbModel.treeWeights()) {
        treeWeights.add(w);
    }
    
    modelInfo.setTreeWeights(treeWeights);

    final List<DecisionTreeModelInfo> decisionTrees = new ArrayList<>();
    for (DecisionTreeModel decisionTreeModel : sparkGbModel.trees()) {
        decisionTrees.add(DECISION_TREE_ADAPTER.getModelInfo((DecisionTreeRegressionModel) decisionTreeModel));
    }
    
    modelInfo.setTrees(decisionTrees);

    final Set<String> inputKeys = new LinkedHashSet<String>();
    inputKeys.add(sparkGbModel.getFeaturesCol());
    inputKeys.add(sparkGbModel.getLabelCol());
    modelInfo.setInputKeys(inputKeys);

    final Set<String> outputKeys = new LinkedHashSet<String>();
    outputKeys.add(sparkGbModel.getPredictionCol());
    modelInfo.setOutputKeys(outputKeys);

    return modelInfo;
}
 
Example #4
Source File: DecisionTreeRegressionModelBridgeTest.java    From spark-transformers with Apache License 2.0 5 votes vote down vote up
@Test
public void testDecisionTreeRegressionPrediction() {
    // Load the data stored in LIBSVM format as a DataFrame.
	String datapath = "src/test/resources/regression_test.libsvm";
	
	Dataset<Row> data = spark.read().format("libsvm").load(datapath);


    // Split the data into training and test sets (30% held out for testing)
    Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
    Dataset<Row> trainingData = splits[0];
    Dataset<Row> testData = splits[1];

    // Train a DecisionTree model.
    DecisionTreeRegressionModel regressionModel = new DecisionTreeRegressor().fit(trainingData);
    trainingData.printSchema();
    
    List<Row> output = regressionModel.transform(testData).select("features", "prediction").collectAsList();
    byte[] exportedModel = ModelExporter.export(regressionModel);

    DecisionTreeTransformer transformer = (DecisionTreeTransformer) ModelImporter.importAndGetTransformer(exportedModel);

    System.out.println(transformer);
    //compare predictions
    for (Row row : output) {
    	Map<String, Object> data_ = new HashMap<>();
        data_.put("features", ((SparseVector) row.get(0)).toArray());
        transformer.transform(data_);
        System.out.println(data_);
        System.out.println(data_.get("prediction"));
        assertEquals((double)data_.get("prediction"), (double)row.get(1), EPSILON);
    }
}
 
Example #5
Source File: GradientBoostClassificationModelInfoAdapter.java    From spark-transformers with Apache License 2.0 5 votes vote down vote up
@Override
GradientBoostModelInfo getModelInfo(final GBTClassificationModel sparkGbModel, final DataFrame df) {
    final GradientBoostModelInfo modelInfo = new GradientBoostModelInfo();

    modelInfo.setNumFeatures(sparkGbModel.numFeatures());
    modelInfo.setRegression(false); //false for classification

    final List<Double> treeWeights = new ArrayList<Double>();
    for (double w : sparkGbModel.treeWeights()) {
        treeWeights.add(w);
    }
    
    modelInfo.setTreeWeights(treeWeights);

    final List<DecisionTreeModelInfo> decisionTrees = new ArrayList<>();
    for (DecisionTreeModel decisionTreeModel : sparkGbModel.trees()) {
        decisionTrees.add(DECISION_TREE_ADAPTER.getModelInfo((DecisionTreeRegressionModel) decisionTreeModel,df));
    }
    
    modelInfo.setTrees(decisionTrees);

    final Set<String> inputKeys = new LinkedHashSet<String>();
    inputKeys.add(sparkGbModel.getFeaturesCol());
    inputKeys.add(sparkGbModel.getLabelCol());
    modelInfo.setInputKeys(inputKeys);

    final Set<String> outputKeys = new LinkedHashSet<String>();
    outputKeys.add(sparkGbModel.getPredictionCol());
    modelInfo.setOutputKeys(outputKeys);

    return modelInfo;
}
 
Example #6
Source File: RandomForestRegressionModelInfoAdapter.java    From spark-transformers with Apache License 2.0 5 votes vote down vote up
@Override
RandomForestModelInfo getModelInfo(final RandomForestRegressionModel sparkRfModel, final DataFrame df) {
    final RandomForestModelInfo modelInfo = new RandomForestModelInfo();

    modelInfo.setNumFeatures(sparkRfModel.numFeatures());
    modelInfo.setRegression(true); //true for regression

    final List<Double> treeWeights = new ArrayList<Double>();
    for (double w : sparkRfModel.treeWeights()) {
        treeWeights.add(w);
    }
    modelInfo.setTreeWeights(treeWeights);

    final List<DecisionTreeModelInfo> decisionTrees = new ArrayList<>();
    for (DecisionTreeModel decisionTreeModel : sparkRfModel.trees()) {
        decisionTrees.add(DECISION_TREE_ADAPTER.getModelInfo((DecisionTreeRegressionModel) decisionTreeModel, df));
    }
    modelInfo.setTrees(decisionTrees);

    final Set<String> inputKeys = new LinkedHashSet<String>();
    inputKeys.add(sparkRfModel.getFeaturesCol());
    inputKeys.add(sparkRfModel.getLabelCol());
    modelInfo.setInputKeys(inputKeys);

    final Set<String> outputKeys = new LinkedHashSet<String>();
    outputKeys.add(sparkRfModel.getPredictionCol());
    modelInfo.setOutputKeys(outputKeys);
    return modelInfo;
}
 
Example #7
Source File: DecisionTreeRegressionModelBridgeTest.java    From spark-transformers with Apache License 2.0 5 votes vote down vote up
@Test
public void testDecisionTreeRegression() {
    // Load the data stored in LIBSVM format as a DataFrame.
    DataFrame data = sqlContext.read().format("libsvm").load("src/test/resources/regression_test.libsvm");

    // Split the data into training and test sets (30% held out for testing)
    DataFrame[] splits = data.randomSplit(new double[]{0.7, 0.3});
    DataFrame trainingData = splits[0];
    DataFrame testData = splits[1];

    // Train a DecisionTree model.
    DecisionTreeRegressionModel regressionModel = new DecisionTreeRegressor()
            .setFeaturesCol("features").fit(trainingData);

    byte[] exportedModel = ModelExporter.export(regressionModel, null);

    Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);

    Row[] sparkOutput = regressionModel.transform(testData).select("features", "prediction").collect();

    //compare predictions
    for (Row row : sparkOutput) {
        Vector v = (Vector) row.get(0);
        double actual = row.getDouble(1);

        Map<String, Object> inputData = new HashMap<String, Object>();
        inputData.put(transformer.getInputKeys().iterator().next(), v.toArray());
        transformer.transform(inputData);
        double predicted = (double) inputData.get(transformer.getOutputKeys().iterator().next());

        System.out.println(actual + ", " + predicted);
        assertEquals(actual, predicted, EPSILON);
    }
}
 
Example #8
Source File: JavaDecisionTreeRegressionExample.java    From SparkDemo with MIT License 4 votes vote down vote up
public static void main(String[] args) {
  SparkSession spark = SparkSession
    .builder()
    .appName("JavaDecisionTreeRegressionExample")
    .getOrCreate();
  // $example on$
  // Load the data stored in LIBSVM format as a DataFrame.
  Dataset<Row> data = spark.read().format("libsvm")
    .load("data/mllib/sample_libsvm_data.txt");

  // Automatically identify categorical features, and index them.
  // Set maxCategories so features with > 4 distinct values are treated as continuous.
  VectorIndexerModel featureIndexer = new VectorIndexer()
    .setInputCol("features")
    .setOutputCol("indexedFeatures")
    .setMaxCategories(4)
    .fit(data);

  // Split the data into training and test sets (30% held out for testing).
  Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
  Dataset<Row> trainingData = splits[0];
  Dataset<Row> testData = splits[1];

  // Train a DecisionTree model.
  DecisionTreeRegressor dt = new DecisionTreeRegressor()
    .setFeaturesCol("indexedFeatures");

  // Chain indexer and tree in a Pipeline.
  Pipeline pipeline = new Pipeline()
    .setStages(new PipelineStage[]{featureIndexer, dt});

  // Train model. This also runs the indexer.
  PipelineModel model = pipeline.fit(trainingData);

  // Make predictions.
  Dataset<Row> predictions = model.transform(testData);

  // Select example rows to display.
  predictions.select("label", "features").show(5);

  // Select (prediction, true label) and compute test error.
  RegressionEvaluator evaluator = new RegressionEvaluator()
    .setLabelCol("label")
    .setPredictionCol("prediction")
    .setMetricName("rmse");
  double rmse = evaluator.evaluate(predictions);
  System.out.println("Root Mean Squared Error (RMSE) on test data = " + rmse);

  DecisionTreeRegressionModel treeModel =
    (DecisionTreeRegressionModel) (model.stages()[1]);
  System.out.println("Learned regression tree model:\n" + treeModel.toDebugString());
  // $example off$

  spark.stop();
}
 
Example #9
Source File: DecisionTreeRegressionModelConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 4 votes vote down vote up
public DecisionTreeRegressionModelConverter(DecisionTreeRegressionModel model){
	super(model);
}
 
Example #10
Source File: DecisionTreeRegressionModelInfoAdapter.java    From spark-transformers with Apache License 2.0 4 votes vote down vote up
@Override
public Class<DecisionTreeRegressionModel> getSource() {
    return DecisionTreeRegressionModel.class;
}
 
Example #11
Source File: DecisionTreeRegressionModelInfoAdapter.java    From spark-transformers with Apache License 2.0 4 votes vote down vote up
@Override
public Class<DecisionTreeRegressionModel> getSource() {
    return DecisionTreeRegressionModel.class;
}