Java Code Examples for org.dmg.pmml.tree.Node#addNodes()

The following examples show how to use org.dmg.pmml.tree.Node#addNodes() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RDFPMMLUtilsTest.java    From oryx with Apache License 2.0 4 votes vote down vote up
private static PMML buildDummyClassificationModel(int numTrees) {
  PMML pmml = PMMLUtils.buildSkeletonPMML();

  List<DataField> dataFields = new ArrayList<>();
  DataField predictor =
      new DataField(FieldName.create("color"), OpType.CATEGORICAL, DataType.STRING);
  predictor.addValues(new Value("yellow"), new Value("red"));
  dataFields.add(predictor);
  DataField target =
      new DataField(FieldName.create("fruit"), OpType.CATEGORICAL, DataType.STRING);
  target.addValues(new Value("banana"), new Value("apple"));
  dataFields.add(target);
  DataDictionary dataDictionary =
      new DataDictionary(dataFields).setNumberOfFields(dataFields.size());
  pmml.setDataDictionary(dataDictionary);

  List<MiningField> miningFields = new ArrayList<>();
  MiningField predictorMF = new MiningField(FieldName.create("color"))
      .setOpType(OpType.CATEGORICAL)
      .setUsageType(MiningField.UsageType.ACTIVE)
      .setImportance(0.5);
  miningFields.add(predictorMF);
  MiningField targetMF = new MiningField(FieldName.create("fruit"))
      .setOpType(OpType.CATEGORICAL)
      .setUsageType(MiningField.UsageType.PREDICTED);
  miningFields.add(targetMF);
  MiningSchema miningSchema = new MiningSchema(miningFields);

  double dummyCount = 2.0;
  Node rootNode =
    new ComplexNode().setId("r").setRecordCount(dummyCount).setPredicate(new True());

  double halfCount = dummyCount / 2;

  Node left = new ComplexNode().setId("r-").setRecordCount(halfCount).setPredicate(new True());
  left.addScoreDistributions(new ScoreDistribution("apple", halfCount));
  Node right = new ComplexNode().setId("r+").setRecordCount(halfCount)
      .setPredicate(new SimpleSetPredicate(FieldName.create("color"),
                                           SimpleSetPredicate.BooleanOperator.IS_NOT_IN,
                                           new Array(Array.Type.STRING, "red")));
  right.addScoreDistributions(new ScoreDistribution("banana", halfCount));

  rootNode.addNodes(right, left);

  TreeModel treeModel = new TreeModel(MiningFunction.CLASSIFICATION, miningSchema, rootNode)
      .setSplitCharacteristic(TreeModel.SplitCharacteristic.BINARY_SPLIT)
      .setMissingValueStrategy(TreeModel.MissingValueStrategy.DEFAULT_CHILD);

  if (numTrees > 1) {
    MiningModel miningModel = new MiningModel(MiningFunction.CLASSIFICATION, miningSchema);
    List<Segment> segments = new ArrayList<>();
    for (int i = 0; i < numTrees; i++) {
      segments.add(new Segment()
          .setId(Integer.toString(i))
          .setPredicate(new True())
          .setModel(treeModel)
          .setWeight(1.0));
    }
    miningModel.setSegmentation(
        new Segmentation(Segmentation.MultipleModelMethod.WEIGHTED_MAJORITY_VOTE, segments));
    pmml.addModels(miningModel);
  } else {
    pmml.addModels(treeModel);
  }

  return pmml;
}
 
Example 2
Source File: RDFPMMLUtilsTest.java    From oryx with Apache License 2.0 4 votes vote down vote up
public static PMML buildDummyRegressionModel() {
  PMML pmml = PMMLUtils.buildSkeletonPMML();

  List<DataField> dataFields = new ArrayList<>();
  dataFields.add(new DataField(FieldName.create("foo"), OpType.CONTINUOUS, DataType.DOUBLE));
  dataFields.add(new DataField(FieldName.create("bar"), OpType.CONTINUOUS, DataType.DOUBLE));
  DataDictionary dataDictionary =
      new DataDictionary(dataFields).setNumberOfFields(dataFields.size());
  pmml.setDataDictionary(dataDictionary);

  List<MiningField> miningFields = new ArrayList<>();
  MiningField predictorMF = new MiningField(FieldName.create("foo"))
      .setOpType(OpType.CONTINUOUS)
      .setUsageType(MiningField.UsageType.ACTIVE)
      .setImportance(0.5);
  miningFields.add(predictorMF);
  MiningField targetMF = new MiningField(FieldName.create("bar"))
      .setOpType(OpType.CONTINUOUS)
      .setUsageType(MiningField.UsageType.PREDICTED);
  miningFields.add(targetMF);
  MiningSchema miningSchema = new MiningSchema(miningFields);

  double dummyCount = 2.0;
  Node rootNode =
      new ComplexNode().setId("r").setRecordCount(dummyCount).setPredicate(new True());

  double halfCount = dummyCount / 2;

  Node left = new ComplexNode()
      .setId("r-")
      .setRecordCount(halfCount)
      .setPredicate(new True())
      .setScore("-2.0");
  Node right = new ComplexNode().setId("r+").setRecordCount(halfCount)
      .setPredicate(new SimplePredicate(FieldName.create("foo"),
                                        SimplePredicate.Operator.GREATER_THAN,
                                        "3.14"))
      .setScore("2.0");

  rootNode.addNodes(right, left);

  TreeModel treeModel = new TreeModel(MiningFunction.REGRESSION, miningSchema, rootNode)
      .setSplitCharacteristic(TreeModel.SplitCharacteristic.BINARY_SPLIT)
      .setMissingValueStrategy(TreeModel.MissingValueStrategy.DEFAULT_CHILD)
      .setMiningSchema(miningSchema);

  pmml.addModels(treeModel);

  return pmml;
}
 
Example 3
Source File: RDFUpdate.java    From oryx with Apache License 2.0 4 votes vote down vote up
private TreeModel toTreeModel(DecisionTreeModel dtModel,
                              CategoricalValueEncodings categoricalValueEncodings,
                              IntLongMap nodeIDCounts) {

  boolean classificationTask = dtModel.algo().equals(Algo.Classification());
  Preconditions.checkState(classificationTask == inputSchema.isClassification());

  Node root = new ComplexNode();
  root.setId("r");

  Queue<Node> modelNodes = new ArrayDeque<>();
  modelNodes.add(root);

  Queue<Pair<org.apache.spark.mllib.tree.model.Node,Split>> treeNodes = new ArrayDeque<>();
  treeNodes.add(new Pair<>(dtModel.topNode(), null));

  while (!treeNodes.isEmpty()) {

    Pair<org.apache.spark.mllib.tree.model.Node,Split> treeNodePredicate = treeNodes.remove();
    Node modelNode = modelNodes.remove();

    // This is the decision that got us here from the parent, if any;
    // not the predicate at this node
    Predicate predicate = buildPredicate(treeNodePredicate.getSecond(),
                                         categoricalValueEncodings);
    modelNode.setPredicate(predicate);

    org.apache.spark.mllib.tree.model.Node treeNode = treeNodePredicate.getFirst();
    long nodeCount = nodeIDCounts.get(treeNode.id());
    modelNode.setRecordCount((double) nodeCount);

    if (treeNode.isLeaf()) {

      Predict prediction = treeNode.predict();
      int targetEncodedValue = (int) prediction.predict();
      if (classificationTask) {
        Map<Integer,String> targetEncodingToValue =
            categoricalValueEncodings.getEncodingValueMap(inputSchema.getTargetFeatureIndex());
        double predictedProbability = prediction.prob();
        Preconditions.checkState(predictedProbability >= 0.0 && predictedProbability <= 1.0);
        // Not sure how nodeCount == 0 can happen but it does in the MLlib model
        long effectiveNodeCount = Math.max(1, nodeCount);
        // Problem: MLlib only gives a predicted class and its probability, and no distribution
        // over the rest. Infer that the rest of the probability is evenly distributed.
        double restProbability = (1.0 - predictedProbability) / (targetEncodingToValue.size() - 1);

        targetEncodingToValue.forEach((encodedValue, value) -> {
          double probability = encodedValue == targetEncodedValue ? predictedProbability : restProbability;
          // Yes, recordCount may be fractional; it's a relative indicator
          double recordCount = probability * effectiveNodeCount;
          if (recordCount > 0.0) {
            ScoreDistribution distribution = new ScoreDistribution(value, recordCount);
            // Not "confident" enough in the "probability" to call it one
            distribution.setConfidence(probability);
            modelNode.addScoreDistributions(distribution);
          }
        });
      } else {
        modelNode.setScore(Double.toString(targetEncodedValue));
      }

    } else {

      Split split = treeNode.split().get();

      Node positiveModelNode = new ComplexNode().setId(modelNode.getId() + "+");
      Node negativeModelNode = new ComplexNode().setId(modelNode.getId() + "-");
      modelNode.addNodes(positiveModelNode, negativeModelNode);

      org.apache.spark.mllib.tree.model.Node rightTreeNode = treeNode.rightNode().get();
      org.apache.spark.mllib.tree.model.Node leftTreeNode = treeNode.leftNode().get();

      boolean defaultRight = nodeIDCounts.get(rightTreeNode.id()) > nodeIDCounts.get(leftTreeNode.id());
      modelNode.setDefaultChild(defaultRight ? positiveModelNode.getId() : negativeModelNode.getId());

      // Right node is "positive", so carries the predicate. It must evaluate first
      // and therefore come first in the tree
      modelNodes.add(positiveModelNode);
      modelNodes.add(negativeModelNode);
      treeNodes.add(new Pair<>(rightTreeNode, split));
      treeNodes.add(new Pair<>(leftTreeNode, null));

    }

  }

  return new TreeModel()
      .setNode(root)
      .setSplitCharacteristic(TreeModel.SplitCharacteristic.BINARY_SPLIT)
      .setMissingValueStrategy(TreeModel.MissingValueStrategy.DEFAULT_CHILD);
}
 
Example 4
Source File: GolfingTreeModelExample.java    From jpmml-model with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
public PMML produce(){
	FieldName temperature = FieldName.create("temperature");
	FieldName humidity = FieldName.create("humidity");
	FieldName windy = FieldName.create("windy");
	FieldName outlook = FieldName.create("outlook");
	FieldName whatIdo = FieldName.create("whatIDo");

	Header header = new Header()
		.setCopyright("www.dmg.org")
		.setDescription("A very small binary tree model to show structure.");

	DataDictionary dataDictionary = new DataDictionary()
		.addDataFields(
			new DataField(temperature, OpType.CONTINUOUS, DataType.DOUBLE),
			new DataField(humidity, OpType.CONTINUOUS, DataType.DOUBLE),
			new DataField(windy, OpType.CATEGORICAL, DataType.STRING)
				.addValues(createValues("true", "false")),
			new DataField(outlook, OpType.CATEGORICAL, DataType.STRING)
				.addValues(createValues("sunny", "overcast", "rain")),
			new DataField(whatIdo, OpType.CATEGORICAL, DataType.STRING)
				.addValues(createValues("will play", "may play", "no play"))
		);

	dataDictionary.setNumberOfFields((dataDictionary.getDataFields()).size());

	MiningSchema miningSchema = new MiningSchema()
		.addMiningFields(
			new MiningField(temperature),
			new MiningField(humidity),
			new MiningField(windy),
			new MiningField(outlook),
			new MiningField(whatIdo)
				.setUsageType(MiningField.UsageType.TARGET)
		);

	Node root = new BranchNode("will play", True.INSTANCE);

	// Upper half of the tree
	root.addNodes(
		new BranchNode("will play", new SimplePredicate(outlook, Operator.EQUAL, "sunny"))
			.addNodes(
				new BranchNode("will play",
					createCompoundPredicate(BooleanOperator.AND,
						new SimplePredicate(temperature, Operator.LESS_THAN, "90"),
						new SimplePredicate(temperature, Operator.GREATER_THAN, "50"))
					)
					.addNodes(
						new LeafNode("will play", new SimplePredicate(humidity, Operator.LESS_THAN, "80")),
						new LeafNode("no play", new SimplePredicate(humidity, Operator.GREATER_OR_EQUAL, "80"))
					),
				new LeafNode("no play",
					createCompoundPredicate(BooleanOperator.OR,
						new SimplePredicate(temperature, Operator.GREATER_OR_EQUAL, "90"),
						new SimplePredicate(temperature, Operator.LESS_OR_EQUAL, "50"))
					)
			)
	);

	// Lower half of the tree
	root.addNodes(
		new BranchNode("may play",
			createCompoundPredicate(BooleanOperator.OR,
				new SimplePredicate(outlook, Operator.EQUAL, "overcast"),
				new SimplePredicate(outlook, Operator.EQUAL, "rain"))
			)
			.addNodes(
				new LeafNode("may play",
					createCompoundPredicate(BooleanOperator.AND,
						new SimplePredicate(temperature, Operator.GREATER_THAN, "60"),
						new SimplePredicate(temperature, Operator.LESS_THAN, "100"),
						new SimplePredicate(outlook, Operator.EQUAL, "overcast"),
						new SimplePredicate(humidity, Operator.LESS_THAN, "70"),
						new SimplePredicate(windy, Operator.EQUAL, "false"))
					),
				new LeafNode("no play",
					createCompoundPredicate(BooleanOperator.AND,
						new SimplePredicate(outlook, Operator.EQUAL, "rain"),
						new SimplePredicate(humidity, Operator.LESS_THAN, "70"))
					)
			)
	);

	TreeModel treeModel = new TreeModel(MiningFunction.CLASSIFICATION, miningSchema, root)
		.setModelName("golfing");

	PMML pmml = new PMML(Version.PMML_4_4.getVersion(), header, dataDictionary)
		.addModels(treeModel);

	return pmml;
}
 
Example 5
Source File: HasNodeRegistryTest.java    From jpmml-evaluator with GNU Affero General Public License v3.0 4 votes vote down vote up
@Test
public void getPath(){
	Node node1a = new BranchNode();

	Node node2a = new BranchNode();
	Node node2b = new BranchNode();

	node1a.addNodes(node2a, node2b);

	Node node3a = new BranchNode();
	Node node3b = new BranchNode();

	node2a.addNodes(node3a, node3b);

	Node node3c = new LeafNode();
	Node node3d = new LeafNode();

	node2b.addNodes(node3c, node3d);

	PMML pmml = new PMML(Version.PMML_4_3.getVersion(), new Header(), new DataDictionary())
		.addModels(new TreeModel(MiningFunction.REGRESSION, new MiningSchema(), node1a));

	HasNodeRegistry hasNodeRegistry = new TreeModelEvaluator(pmml);

	BiMap<Node, String> nodeRegistry = (hasNodeRegistry.getEntityRegistry()).inverse();

	String id1a = nodeRegistry.get(node1a);

	String id2a = nodeRegistry.get(node2a);
	String id2b = nodeRegistry.get(node2b);

	String id3a = nodeRegistry.get(node3a);
	String id3b = nodeRegistry.get(node3b);
	String id3c = nodeRegistry.get(node3c);
	String id3d = nodeRegistry.get(node3d);

	assertEquals(Arrays.asList(node1a), hasNodeRegistry.getPath(id1a));
	assertEquals(Arrays.asList(node1a, node2a), hasNodeRegistry.getPath(id2a));
	assertEquals(Arrays.asList(node1a, node2a, node3a), hasNodeRegistry.getPath(id3a));

	assertEquals(Arrays.asList(node1a), hasNodeRegistry.getPathBetween(id1a, id1a));
	assertEquals(Arrays.asList(node1a, node2a), hasNodeRegistry.getPathBetween(id1a, id2a));
	assertNull(hasNodeRegistry.getPathBetween(id2a, id1a));
	assertEquals(Arrays.asList(node2a, node3a), hasNodeRegistry.getPathBetween(id2a, id3a));
	assertEquals(Arrays.asList(node2a, node3b), hasNodeRegistry.getPathBetween(id2a, id3b));

	assertNull(hasNodeRegistry.getPathBetween(id2a, id2b));
	assertNull(hasNodeRegistry.getPathBetween(id2a, id3c));
	assertNull(hasNodeRegistry.getPathBetween(id2a, id3d));
}
 
Example 6
Source File: TreePathFinderTest.java    From jpmml-model with BSD 3-Clause "New" or "Revised" License 3 votes vote down vote up
@Test
public void find(){
	Node node1a = new BranchNode();

	Node node2a = new LeafNode();
	Node node2b = new BranchNode();
	Node node2c = new BranchNode();

	node1a.addNodes(node2a, node2b, node2c);

	Node node3a = new BranchNode();
	Node node3b = new LeafNode();

	node2b.addNodes(node3a);
	node2c.addNodes(node3b);

	Node node4a = new LeafNode();

	node3a.addNodes(node4a);

	TreeModel treeModel = new TreeModel(MiningFunction.CLASSIFICATION, new MiningSchema(), node1a);

	TreePathFinder finder = new TreePathFinder();
	finder.applyTo(treeModel);

	Map<Node, List<Node>> paths = finder.getPaths();

	assertEquals(3, paths.size());

	assertEquals(Arrays.asList(node1a, node2a), paths.get(node2a));
	assertEquals(Arrays.asList(node1a, node2b, node3a, node4a), paths.get(node4a));
	assertEquals(Arrays.asList(node1a, node2c, node3b), paths.get(node3b));
}
 
Example 7
Source File: ArrayListTransformerTest.java    From jpmml-model with BSD 3-Clause "New" or "Revised" License 3 votes vote down vote up
@Test
public void transform(){
	Node node1a = new BranchNode();

	Node node2a = new BranchNode();
	Node node2b = new LeafNode();

	node1a.addNodes(node2a, node2b);

	Array array = new ComplexArray()
		.setType(Array.Type.INT)
		.setValue(Arrays.asList(-1, 1));

	Predicate predicate = new SimpleSetPredicate(FieldName.create("x"), SimpleSetPredicate.BooleanOperator.IS_IN, array);

	Node node3a = new LeafNode(null, predicate);

	node2a.addNodes(node3a);

	assertTrue(node1a.getNodes() instanceof ArrayList);
	assertTrue(node2a.getNodes() instanceof ArrayList);

	Object value = array.getValue();

	assertTrue(value instanceof ArrayList);
	assertTrue(value instanceof ComplexValue);

	TreeModel treeModel = new TreeModel(MiningFunction.CLASSIFICATION, new MiningSchema(), node1a);

	ArrayListTransformer transformer = new ArrayListTransformer();
	transformer.applyTo(treeModel);

	assertTrue(node1a.getNodes() instanceof DoubletonList);
	assertTrue(node2a.getNodes() instanceof SingletonList);

	value = array.getValue();

	assertTrue(value instanceof ArrayList);
	assertTrue(value instanceof ComplexValue);
}
 
Example 8
Source File: NodeScoreParserTest.java    From jpmml-evaluator with GNU Affero General Public License v3.0 2 votes vote down vote up
@Test
public void parseAndIntern(){
	Node node1a = new BranchNode("1", True.INSTANCE);

	Node node2a = new LeafNode("2", False.INSTANCE);
	Node node2b = new BranchNode("2.0", False.INSTANCE);
	Node node2c = new LeafNode(2.0f, True.INSTANCE);

	node1a.addNodes(node2a, node2b, node2c);

	Node node3a = new LeafNode("error", False.INSTANCE);

	node2b.addNodes(node3a);

	TreeModel treeModel = new TreeModel(MiningFunction.CLASSIFICATION, new MiningSchema(), node1a)
		.setMathContext(MathContext.FLOAT);

	VisitorBattery visitorBattery = new VisitorBattery();
	visitorBattery.add(NodeScoreParser.class);
	visitorBattery.add(FloatInterner.class);

	visitorBattery.applyTo(treeModel);

	assertEquals("1", node1a.getScore());

	assertEquals("2", node2a.getScore());
	assertEquals("2.0", node2b.getScore());
	assertEquals(2.0f, node2c.getScore());

	assertEquals("error", node3a.getScore());

	treeModel.setMiningFunction(MiningFunction.REGRESSION);

	visitorBattery.applyTo(treeModel);

	assertEquals(1.0f, node1a.getScore());

	assertEquals(2.0f, node2a.getScore());
	assertEquals(2.0f, node2b.getScore());
	assertEquals(2.0f, node2c.getScore());

	assertSame(node2a.getScore(), node2b.getScore());
	assertSame(node2a.getScore(), node2c.getScore());

	assertEquals("error", node3a.getScore());
}