org.apache.spark.ml.classification.DecisionTreeClassificationModel Java Examples

The following examples show how to use org.apache.spark.ml.classification.DecisionTreeClassificationModel. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: DecisionTreeClassificationModelInfoAdapter.java From spark-transformers with Apache License 2.0

6 votes

public DecisionTreeModelInfo getModelInfo(final DecisionTreeClassificationModel decisionTreeModel) {
    final DecisionTreeModelInfo treeInfo = new DecisionTreeModelInfo();

    Node rootNode = decisionTreeModel.rootNode();
    treeInfo.setRoot(DecisionNodeAdapterUtils.adaptNode(rootNode));

    final Set<String> inputKeys = new LinkedHashSet<String>();
    inputKeys.add(decisionTreeModel.getFeaturesCol());
    inputKeys.add(decisionTreeModel.getLabelCol());
    treeInfo.setInputKeys(inputKeys);

    final Set<String> outputKeys = new LinkedHashSet<String>();
    outputKeys.add(decisionTreeModel.getPredictionCol());
    outputKeys.add(decisionTreeModel.getProbabilityCol());
    outputKeys.add(decisionTreeModel.getRawPredictionCol());
    treeInfo.setProbabilityKey(decisionTreeModel.getProbabilityCol());
    treeInfo.setRawPredictionKey(decisionTreeModel.getRawPredictionCol());
    treeInfo.setOutputKeys(outputKeys);

    return treeInfo;
}

Example #2

Source File: DecisionTreeClassificationModelInfoAdapter.java From spark-transformers with Apache License 2.0

6 votes

public DecisionTreeModelInfo getModelInfo(final DecisionTreeClassificationModel decisionTreeModel,final DataFrame df) {
    final DecisionTreeModelInfo treeInfo = new DecisionTreeModelInfo();

    Node rootNode = decisionTreeModel.rootNode();
    treeInfo.setRoot(DecisionNodeAdapterUtils.adaptNode(rootNode));

    final Set<String> inputKeys = new LinkedHashSet<String>();
    inputKeys.add(decisionTreeModel.getFeaturesCol());
    inputKeys.add(decisionTreeModel.getLabelCol());
    treeInfo.setInputKeys(inputKeys);

    final Set<String> outputKeys = new LinkedHashSet<String>();
    outputKeys.add(decisionTreeModel.getPredictionCol());
    outputKeys.add(decisionTreeModel.getProbabilityCol());
    outputKeys.add(decisionTreeModel.getRawPredictionCol());
    treeInfo.setProbabilityKey(decisionTreeModel.getProbabilityCol());
    treeInfo.setRawPredictionKey(decisionTreeModel.getRawPredictionCol());
    treeInfo.setOutputKeys(outputKeys);

    return treeInfo;
}

Example #3

Source File: DecisionTreeClassificationModelBridgeTest.java From spark-transformers with Apache License 2.0

5 votes

@Test
public void testDecisionTreeClassificationPrediction() {
    // Load the data stored in LIBSVM format as a DataFrame.
	String datapath = "src/test/resources/classification_test.libsvm";
	Dataset<Row> data = spark.read().format("libsvm").load(datapath);


    // Split the data into training and test sets (30% held out for testing)
    Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
    Dataset<Row> trainingData = splits[0];
    Dataset<Row> testData = splits[1];

    // Train a DecisionTree model.
    DecisionTreeClassificationModel classifierModel = new DecisionTreeClassifier().fit(trainingData);
    trainingData.printSchema();
    
    List<Row> output = classifierModel.transform(testData).select("features", "prediction","rawPrediction").collectAsList();
    byte[] exportedModel = ModelExporter.export(classifierModel);

    DecisionTreeTransformer transformer = (DecisionTreeTransformer) ModelImporter.importAndGetTransformer(exportedModel);

    //compare predictions
    for (Row row : output) {
    	Map<String, Object> data_ = new HashMap<>();
    	double [] actualRawPrediction = ((DenseVector) row.get(2)).toArray();
        data_.put("features", ((SparseVector) row.get(0)).toArray());
        transformer.transform(data_);
        System.out.println(data_);
        System.out.println(data_.get("prediction"));
        assertEquals((double)data_.get("prediction"), (double)row.get(1), EPSILON);
        assertArrayEquals((double[]) data_.get("rawPrediction"), actualRawPrediction, EPSILON);
    }
}

Example #4

Source File: RandomForestClassificationModelInfoAdapter.java From spark-transformers with Apache License 2.0

5 votes

@Override
RandomForestModelInfo getModelInfo(final RandomForestClassificationModel sparkRfModel, final DataFrame df) {
    final RandomForestModelInfo modelInfo = new RandomForestModelInfo();

    modelInfo.setNumClasses(sparkRfModel.numClasses());
    modelInfo.setNumFeatures(sparkRfModel.numFeatures());
    modelInfo.setRegression(false); //false for classification

    final List<Double> treeWeights = new ArrayList<Double>();
    for (double w : sparkRfModel.treeWeights()) {
        treeWeights.add(w);
    }
    modelInfo.setTreeWeights(treeWeights);

    final List<DecisionTreeModelInfo> decisionTrees = new ArrayList<>();
    for (DecisionTreeModel decisionTreeModel : sparkRfModel.trees()) {
        decisionTrees.add(DECISION_TREE_ADAPTER.getModelInfo((DecisionTreeClassificationModel) decisionTreeModel, df));
    }
    modelInfo.setTrees(decisionTrees);

    final Set<String> inputKeys = new LinkedHashSet<String>();
    inputKeys.add(sparkRfModel.getFeaturesCol());
    inputKeys.add(sparkRfModel.getLabelCol());
    modelInfo.setInputKeys(inputKeys);

    final Set<String> outputKeys = new LinkedHashSet<String>();
    outputKeys.add(sparkRfModel.getPredictionCol());
    outputKeys.add(sparkRfModel.getProbabilityCol());
    outputKeys.add(sparkRfModel.getRawPredictionCol());
    modelInfo.setProbabilityKey(sparkRfModel.getProbabilityCol());
    modelInfo.setRawPredictionKey(sparkRfModel.getRawPredictionCol());
    modelInfo.setOutputKeys(outputKeys);

    return modelInfo;
}

Example #5

Source File: JavaDecisionTreeClassificationExample.java From SparkDemo with MIT License

4 votes

public static void main(String[] args) {
  SparkSession spark = SparkSession
    .builder()
    .appName("JavaDecisionTreeClassificationExample")
    .getOrCreate();

  // $example on$
  // Load the data stored in LIBSVM format as a DataFrame.
  Dataset<Row> data = spark
    .read()
    .format("libsvm")
    .load("data/mllib/sample_libsvm_data.txt");

  // Index labels, adding metadata to the label column.
  // Fit on whole dataset to include all labels in index.
  StringIndexerModel labelIndexer = new StringIndexer()
    .setInputCol("label")
    .setOutputCol("indexedLabel")
    .fit(data);

  // Automatically identify categorical features, and index them.
  VectorIndexerModel featureIndexer = new VectorIndexer()
    .setInputCol("features")
    .setOutputCol("indexedFeatures")
    .setMaxCategories(4) // features with > 4 distinct values are treated as continuous.
    .fit(data);

  // Split the data into training and test sets (30% held out for testing).
  Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
  Dataset<Row> trainingData = splits[0];
  Dataset<Row> testData = splits[1];

  // Train a DecisionTree model.
  DecisionTreeClassifier dt = new DecisionTreeClassifier()
    .setLabelCol("indexedLabel")
    .setFeaturesCol("indexedFeatures");

  // Convert indexed labels back to original labels.
  IndexToString labelConverter = new IndexToString()
    .setInputCol("prediction")
    .setOutputCol("predictedLabel")
    .setLabels(labelIndexer.labels());

  // Chain indexers and tree in a Pipeline.
  Pipeline pipeline = new Pipeline()
    .setStages(new PipelineStage[]{labelIndexer, featureIndexer, dt, labelConverter});

  // Train model. This also runs the indexers.
  PipelineModel model = pipeline.fit(trainingData);

  // Make predictions.
  Dataset<Row> predictions = model.transform(testData);

  // Select example rows to display.
  predictions.select("predictedLabel", "label", "features").show(5);

  // Select (prediction, true label) and compute test error.
  MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
    .setLabelCol("indexedLabel")
    .setPredictionCol("prediction")
    .setMetricName("accuracy");
  double accuracy = evaluator.evaluate(predictions);
  System.out.println("Test Error = " + (1.0 - accuracy));

  DecisionTreeClassificationModel treeModel =
    (DecisionTreeClassificationModel) (model.stages()[2]);
  System.out.println("Learned classification tree model:\n" + treeModel.toDebugString());
  // $example off$

  spark.stop();
}

Example #6

Source File: DecisionTreeClassificationModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0

4 votes

public DecisionTreeClassificationModelConverter(DecisionTreeClassificationModel model){
	super(model);
}

Example #7

Source File: DecisionTreeClassificationModelInfoAdapter.java From spark-transformers with Apache License 2.0

4 votes

@Override
public Class<DecisionTreeClassificationModel> getSource() {
    return DecisionTreeClassificationModel.class;
}

Example #8

Source File: DecisionTreeClassificationModelInfoAdapter.java From spark-transformers with Apache License 2.0

4 votes

@Override
public Class<DecisionTreeClassificationModel> getSource() {
    return DecisionTreeClassificationModel.class;
}

Example #9

Source File: DecisionTreeClassificationModelBridgeTest.java From spark-transformers with Apache License 2.0

4 votes

@Test
public void testDecisionTreeClassificationRawPrediction() {
    // Load the data stored in LIBSVM format as a DataFrame.
    DataFrame data = sqlContext.read().format("libsvm").load("src/test/resources/classification_test.libsvm");

    StringIndexerModel stringIndexerModel = new StringIndexer()
            .setInputCol("label")
            .setOutputCol("labelIndex")
            .fit(data);

    data = stringIndexerModel.transform(data);

    // Split the data into training and test sets (30% held out for testing)
    DataFrame[] splits = data.randomSplit(new double[]{0.7, 0.3});
    DataFrame trainingData = splits[0];
    DataFrame testData = splits[1];

    // Train a DecisionTree model.
    DecisionTreeClassificationModel classificationModel = new DecisionTreeClassifier()
            .setLabelCol("labelIndex")
            .setFeaturesCol("features")
            .setRawPredictionCol("rawPrediction")
            .setPredictionCol("prediction")
            .fit(trainingData);

    byte[] exportedModel = ModelExporter.export(classificationModel, null);

    Transformer transformer = (DecisionTreeTransformer) ModelImporter.importAndGetTransformer(exportedModel);

    Row[] sparkOutput = classificationModel.transform(testData).select("features", "prediction", "rawPrediction").collect();

    //compare predictions
    for (Row row : sparkOutput) {
        Vector inp = (Vector) row.get(0);
        double actual = row.getDouble(1);
        double[] actualRaw = ((Vector) row.get(2)).toArray();

        Map<String, Object> inputData = new HashMap<>();
        inputData.put(transformer.getInputKeys().iterator().next(), inp.toArray());
        transformer.transform(inputData);
        double predicted = (double) inputData.get(transformer.getOutputKeys().iterator().next());
        double[] rawPrediction = (double[]) inputData.get("rawPrediction");

        assertEquals(actual, predicted, EPSILON);
        assertArrayEquals(actualRaw, rawPrediction, EPSILON);
    }
}