org.dmg.pmml.ScoreDistribution Java Examples

The following examples show how to use org.dmg.pmml.ScoreDistribution. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ScoreDistributionManager.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
public List<ScoreDistribution> createScoreDistribution(CategoricalLabel categoricalLabel, double[] recordCounts){
	List<ScoreDistribution> result = new ArrayList<>();

	for(int i = 0; i < categoricalLabel.size(); i++){
		Object value = categoricalLabel.getValue(i);
		double recordCount = recordCounts[i];

		ScoreDistribution scoreDistribution = new InternableScoreDistribution()
			.setValue(value)
			.setRecordCount(recordCount);

		scoreDistribution = intern(scoreDistribution);

		result.add(scoreDistribution);
	}

	return result;
}
 
Example #2
Source File: ScoreDistributionInternerTest.java    From jpmml-evaluator with GNU Affero General Public License v3.0 6 votes vote down vote up
@Test
public void intern(){
	ScoreDistribution left = new ScoreDistribution("event", 0.33d);
	ScoreDistribution right = new ScoreDistribution("event", 0.33d);

	Node leftChild = createNode(left);
	Node rightChild = createNode(right);

	Node root = new ComplexNode(True.INSTANCE)
		.addNodes(leftChild, rightChild);

	TreeModel treeModel = new TreeModel()
		.setNode(root);

	for(int i = 0; i < 2; i++){
		assertNotSame((leftChild.getScoreDistributions()).get(i), (rightChild.getScoreDistributions()).get(i));
	}

	ScoreDistributionInterner interner = new ScoreDistributionInterner();
	interner.applyTo(treeModel);

	for(int i = 0; i < 2; i++){
		assertSame((leftChild.getScoreDistributions()).get(i), (rightChild.getScoreDistributions()).get(i));
	}
}
 
Example #3
Source File: RDFUpdateIT.java    From oryx with Apache License 2.0 5 votes vote down vote up
private static void checkNode(Node node) {
  assertNotNull(node.getId());
  if (!node.hasScoreDistributions()) {
    // Non-leaf
    List<Node> children = node.getNodes();
    assertEquals(2, children.size());
    Node rightChild = children.get(0);
    Node leftChild = children.get(1);
    assertInstanceOf(leftChild.getPredicate(), True.class);
    assertEquals(node.getRecordCount().intValue(),
                 leftChild.getRecordCount().intValue() + rightChild.getRecordCount().intValue());
    assertEquals(node.getId() + "+", rightChild.getId());
    assertEquals(node.getId() + "-", leftChild.getId());
    checkNode(rightChild);
    checkNode(leftChild);
  } else {
    // Leaf
    List<ScoreDistribution> scoreDists = node.getScoreDistributions();
    int numDists = scoreDists.size();
    assertRange(numDists, 1, 2);
    ScoreDistribution first = scoreDists.get(0);
    if (numDists == 1) {
      assertEquals(1.0, first.getConfidence().doubleValue());
    } else {
      assertGreater(first.getConfidence().doubleValue(), 0.0);
      assertLess(first.getConfidence().doubleValue(), 1.0);
      ScoreDistribution second = scoreDists.get(1);
      assertGreater(second.getConfidence().doubleValue(), 0.0);
      assertLess(second.getConfidence().doubleValue(), 1.0);
    }
  }
}
 
Example #4
Source File: ClassifierNode.java    From jpmml-model with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public List<ScoreDistribution> getScoreDistributions(){

	if(this.scoreDistributions == null){
		this.scoreDistributions = new ArrayList<>();
	}

	return this.scoreDistributions;
}
 
Example #5
Source File: TargetCategoryParser.java    From jpmml-evaluator with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public VisitorAction visit(ScoreDistribution scoreDistribution){
	Object value = scoreDistribution.getValue();
	if(value == null){
		throw new MissingAttributeException(scoreDistribution, PMMLAttributes.SCOREDISTRIBUTION_VALUE);
	}

	scoreDistribution.setValue(parseTargetValue(value));

	return super.visit(scoreDistribution);
}
 
Example #6
Source File: ScoreDistributionInterner.java    From jpmml-evaluator with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public VisitorAction visit(Node node){

	if(node.hasScoreDistributions()){
		List<ScoreDistribution> scoreDistributions = node.getScoreDistributions();

		for(ListIterator<ScoreDistribution> it = scoreDistributions.listIterator(); it.hasNext(); ){
			it.set(intern(it.next()));
		}
	}

	return super.visit(node);
}
 
Example #7
Source File: ScoreDistributionInterner.java    From jpmml-evaluator with GNU Affero General Public License v3.0 5 votes vote down vote up
private ScoreDistribution intern(ScoreDistribution scoreDistribution){

		if(scoreDistribution == null || scoreDistribution.hasExtensions()){
			return scoreDistribution;
		}

		return this.cache.intern(scoreDistribution);
	}
 
Example #8
Source File: ScoreDistributionInternerTest.java    From jpmml-evaluator with GNU Affero General Public License v3.0 5 votes vote down vote up
static
private Node createNode(ScoreDistribution event){
	ScoreDistribution noEvent = new ScoreDistribution("no-event", 1d - NumberUtil.asDouble(event.getRecordCount()));

	Node node = new ComplexNode()
		.addScoreDistributions(event, noEvent);

	return node;
}
 
Example #9
Source File: ScoreDistributionManager.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
public ScoreDistribution intern(ScoreDistribution scoreDistribution){
	return this.interner.intern(scoreDistribution);
}
 
Example #10
Source File: DummyClassifier.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public TreeModel encodeModel(Schema schema){
	List<?> classes = getClasses();
	List<? extends Number> classPrior = getClassPrior();
	Object constant = getConstant();
	String strategy = getStrategy();

	ClassDictUtil.checkSize(classes, classPrior);

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	int index;

	double[] probabilities;

	switch(strategy){
		case "constant":
			{
				index = classes.indexOf(constant);

				probabilities = new double[classes.size()];
				probabilities[index] = 1d;
			}
			break;
		case "most_frequent":
			{
				index = classPrior.indexOf(Collections.max((List)classPrior));

				probabilities = new double[classes.size()];
				probabilities[index] = 1d;
			}
			break;
		case "prior":
			{
				index = classPrior.indexOf(Collections.max((List)classPrior));

				probabilities = Doubles.toArray(classPrior);
			}
			break;
		default:
			throw new IllegalArgumentException(strategy);
	}

	Node root = new ClassifierNode(ValueUtil.asString(classes.get(index)), True.INSTANCE);

	List<ScoreDistribution> scoreDistributions = root.getScoreDistributions();

	for(int i = 0; i < classes.size(); i++){
		ScoreDistribution scoreDistribution = new ScoreDistribution(ValueUtil.asString(classes.get(i)), probabilities[i]);

		scoreDistributions.add(scoreDistribution);
	}

	TreeModel treeModel = new TreeModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), root)
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));

	return treeModel;
}
 
Example #11
Source File: RDFPMMLUtilsTest.java    From oryx with Apache License 2.0 4 votes vote down vote up
private static PMML buildDummyClassificationModel(int numTrees) {
  PMML pmml = PMMLUtils.buildSkeletonPMML();

  List<DataField> dataFields = new ArrayList<>();
  DataField predictor =
      new DataField(FieldName.create("color"), OpType.CATEGORICAL, DataType.STRING);
  predictor.addValues(new Value("yellow"), new Value("red"));
  dataFields.add(predictor);
  DataField target =
      new DataField(FieldName.create("fruit"), OpType.CATEGORICAL, DataType.STRING);
  target.addValues(new Value("banana"), new Value("apple"));
  dataFields.add(target);
  DataDictionary dataDictionary =
      new DataDictionary(dataFields).setNumberOfFields(dataFields.size());
  pmml.setDataDictionary(dataDictionary);

  List<MiningField> miningFields = new ArrayList<>();
  MiningField predictorMF = new MiningField(FieldName.create("color"))
      .setOpType(OpType.CATEGORICAL)
      .setUsageType(MiningField.UsageType.ACTIVE)
      .setImportance(0.5);
  miningFields.add(predictorMF);
  MiningField targetMF = new MiningField(FieldName.create("fruit"))
      .setOpType(OpType.CATEGORICAL)
      .setUsageType(MiningField.UsageType.PREDICTED);
  miningFields.add(targetMF);
  MiningSchema miningSchema = new MiningSchema(miningFields);

  double dummyCount = 2.0;
  Node rootNode =
    new ComplexNode().setId("r").setRecordCount(dummyCount).setPredicate(new True());

  double halfCount = dummyCount / 2;

  Node left = new ComplexNode().setId("r-").setRecordCount(halfCount).setPredicate(new True());
  left.addScoreDistributions(new ScoreDistribution("apple", halfCount));
  Node right = new ComplexNode().setId("r+").setRecordCount(halfCount)
      .setPredicate(new SimpleSetPredicate(FieldName.create("color"),
                                           SimpleSetPredicate.BooleanOperator.IS_NOT_IN,
                                           new Array(Array.Type.STRING, "red")));
  right.addScoreDistributions(new ScoreDistribution("banana", halfCount));

  rootNode.addNodes(right, left);

  TreeModel treeModel = new TreeModel(MiningFunction.CLASSIFICATION, miningSchema, rootNode)
      .setSplitCharacteristic(TreeModel.SplitCharacteristic.BINARY_SPLIT)
      .setMissingValueStrategy(TreeModel.MissingValueStrategy.DEFAULT_CHILD);

  if (numTrees > 1) {
    MiningModel miningModel = new MiningModel(MiningFunction.CLASSIFICATION, miningSchema);
    List<Segment> segments = new ArrayList<>();
    for (int i = 0; i < numTrees; i++) {
      segments.add(new Segment()
          .setId(Integer.toString(i))
          .setPredicate(new True())
          .setModel(treeModel)
          .setWeight(1.0));
    }
    miningModel.setSegmentation(
        new Segmentation(Segmentation.MultipleModelMethod.WEIGHTED_MAJORITY_VOTE, segments));
    pmml.addModels(miningModel);
  } else {
    pmml.addModels(treeModel);
  }

  return pmml;
}
 
Example #12
Source File: RDFUpdate.java    From oryx with Apache License 2.0 4 votes vote down vote up
private TreeModel toTreeModel(DecisionTreeModel dtModel,
                              CategoricalValueEncodings categoricalValueEncodings,
                              IntLongMap nodeIDCounts) {

  boolean classificationTask = dtModel.algo().equals(Algo.Classification());
  Preconditions.checkState(classificationTask == inputSchema.isClassification());

  Node root = new ComplexNode();
  root.setId("r");

  Queue<Node> modelNodes = new ArrayDeque<>();
  modelNodes.add(root);

  Queue<Pair<org.apache.spark.mllib.tree.model.Node,Split>> treeNodes = new ArrayDeque<>();
  treeNodes.add(new Pair<>(dtModel.topNode(), null));

  while (!treeNodes.isEmpty()) {

    Pair<org.apache.spark.mllib.tree.model.Node,Split> treeNodePredicate = treeNodes.remove();
    Node modelNode = modelNodes.remove();

    // This is the decision that got us here from the parent, if any;
    // not the predicate at this node
    Predicate predicate = buildPredicate(treeNodePredicate.getSecond(),
                                         categoricalValueEncodings);
    modelNode.setPredicate(predicate);

    org.apache.spark.mllib.tree.model.Node treeNode = treeNodePredicate.getFirst();
    long nodeCount = nodeIDCounts.get(treeNode.id());
    modelNode.setRecordCount((double) nodeCount);

    if (treeNode.isLeaf()) {

      Predict prediction = treeNode.predict();
      int targetEncodedValue = (int) prediction.predict();
      if (classificationTask) {
        Map<Integer,String> targetEncodingToValue =
            categoricalValueEncodings.getEncodingValueMap(inputSchema.getTargetFeatureIndex());
        double predictedProbability = prediction.prob();
        Preconditions.checkState(predictedProbability >= 0.0 && predictedProbability <= 1.0);
        // Not sure how nodeCount == 0 can happen but it does in the MLlib model
        long effectiveNodeCount = Math.max(1, nodeCount);
        // Problem: MLlib only gives a predicted class and its probability, and no distribution
        // over the rest. Infer that the rest of the probability is evenly distributed.
        double restProbability = (1.0 - predictedProbability) / (targetEncodingToValue.size() - 1);

        targetEncodingToValue.forEach((encodedValue, value) -> {
          double probability = encodedValue == targetEncodedValue ? predictedProbability : restProbability;
          // Yes, recordCount may be fractional; it's a relative indicator
          double recordCount = probability * effectiveNodeCount;
          if (recordCount > 0.0) {
            ScoreDistribution distribution = new ScoreDistribution(value, recordCount);
            // Not "confident" enough in the "probability" to call it one
            distribution.setConfidence(probability);
            modelNode.addScoreDistributions(distribution);
          }
        });
      } else {
        modelNode.setScore(Double.toString(targetEncodedValue));
      }

    } else {

      Split split = treeNode.split().get();

      Node positiveModelNode = new ComplexNode().setId(modelNode.getId() + "+");
      Node negativeModelNode = new ComplexNode().setId(modelNode.getId() + "-");
      modelNode.addNodes(positiveModelNode, negativeModelNode);

      org.apache.spark.mllib.tree.model.Node rightTreeNode = treeNode.rightNode().get();
      org.apache.spark.mllib.tree.model.Node leftTreeNode = treeNode.leftNode().get();

      boolean defaultRight = nodeIDCounts.get(rightTreeNode.id()) > nodeIDCounts.get(leftTreeNode.id());
      modelNode.setDefaultChild(defaultRight ? positiveModelNode.getId() : negativeModelNode.getId());

      // Right node is "positive", so carries the predicate. It must evaluate first
      // and therefore come first in the tree
      modelNodes.add(positiveModelNode);
      modelNodes.add(negativeModelNode);
      treeNodes.add(new Pair<>(rightTreeNode, split));
      treeNodes.add(new Pair<>(leftTreeNode, null));

    }

  }

  return new TreeModel()
      .setNode(root)
      .setSplitCharacteristic(TreeModel.SplitCharacteristic.BINARY_SPLIT)
      .setMissingValueStrategy(TreeModel.MissingValueStrategy.DEFAULT_CHILD);
}
 
Example #13
Source File: Node.java    From jpmml-model with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
public List<ScoreDistribution> getScoreDistributions(){
	throw new UnsupportedOperationException();
}
 
Example #14
Source File: Node.java    From jpmml-model with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
public Node addScoreDistributions(ScoreDistribution... scoreDistributions){
	getScoreDistributions().addAll(Arrays.asList(scoreDistributions));

	return this;
}
 
Example #15
Source File: ScoreDistributionInterner.java    From jpmml-evaluator with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public ElementKey createKey(ScoreDistribution scoreDistribution){
	Object[] content = {scoreDistribution.getValue(), scoreDistribution.getRecordCount(), scoreDistribution.getProbability(), scoreDistribution.getConfidence()};

	return new ElementKey(content);
}