org.jpmml.converter.Schema Java Exaples

Source File: AdaConverter.java From jpmml-r with GNU Affero General Public License v3.0

6 votes

@Override
public RPartConverter createConverter(RGenericVector rpart){
	return new RPartConverter(rpart){

		@Override
		public boolean hasScoreDistribution(){
			return false;
		}

		@Override
		public TreeModel encodeModel(Schema schema){
			TreeModel treeModel = super.encodeModel(schema)
				.setMiningFunction(MiningFunction.REGRESSION);

			return treeModel;
		}
	};
}

Source File: BaggingClassifier.java From jpmml-sklearn with GNU Affero General Public License v3.0

6 votes

@Override
public MiningModel encodeModel(Schema schema){
	List<? extends Classifier> estimators = getEstimators();
	List<List<Integer>> estimatorsFeatures = getEstimatorsFeatures();

	Segmentation.MultipleModelMethod multipleModelMethod = Segmentation.MultipleModelMethod.AVERAGE;

	for(Classifier estimator : estimators){

		if(!estimator.hasProbabilityDistribution()){
			multipleModelMethod = Segmentation.MultipleModelMethod.MAJORITY_VOTE;

			break;
		}
	}

	MiningModel miningModel = BaggingUtil.encodeBagging(estimators, estimatorsFeatures, multipleModelMethod, MiningFunction.CLASSIFICATION, schema)
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, (CategoricalLabel)schema.getLabel()));

	return miningModel;
}

Source File: LibSVMRegressor.java From jpmml-sklearn with GNU Affero General Public License v3.0

6 votes

@Override
public SupportVectorMachineModel encodeModel(Schema schema){
	int[] shape = getSupportVectorsShape();

	int numberOfVectors = shape[0];
	int numberOfFeatures = shape[1];

	List<Integer> support = getSupport();
	List<? extends Number> supportVectors = getSupportVectors();
	List<? extends Number> dualCoef = getDualCoef();
	List<? extends Number> intercept = getIntercept();

	Kernel kernel = SupportVectorMachineUtil.createKernel(getKernel(), getDegree(), getGamma(), getCoef0());

	return LibSVMUtil.createRegression(kernel, new CMatrix<>(supportVectors, numberOfVectors, numberOfFeatures), SupportVectorMachineUtil.formatIds(support), Iterables.getOnlyElement(intercept), dualCoef, schema);
}

Source File: RandomForestConverter.java From jpmml-r with GNU Affero General Public License v3.0

6 votes

private <P extends Number> TreeModel encodeTreeModel(MiningFunction miningFunction, ScoreEncoder<P> scoreEncoder, List<? extends Number> leftDaughter, List<? extends Number> rightDaughter, List<P> nodepred, List<? extends Number> bestvar, List<Double> xbestsplit, Schema schema){
	RGenericVector randomForest = getObject();

	Node root = encodeNode(True.INSTANCE, 0, scoreEncoder, leftDaughter, rightDaughter, bestvar, xbestsplit, nodepred, new CategoryManager(), schema);

	TreeModel treeModel = new TreeModel(miningFunction, ModelUtil.createMiningSchema(schema.getLabel()), root)
		.setMissingValueStrategy(TreeModel.MissingValueStrategy.NULL_PREDICTION)
		.setSplitCharacteristic(TreeModel.SplitCharacteristic.BINARY_SPLIT);

	if(this.compact){
		Visitor visitor = new RandomForestCompactor();

		visitor.applyTo(treeModel);
	}

	return treeModel;
}

Source File: AdaConverter.java From jpmml-r with GNU Affero General Public License v3.0

6 votes

@Override
public Model encodeModel(Schema schema){
	RGenericVector ada = getObject();

	RGenericVector model = ada.getGenericElement("model");

	RGenericVector trees = model.getGenericElement("trees");
	RDoubleVector alpha = model.getDoubleElement("alpha");

	List<TreeModel> treeModels = encodeTreeModels(trees);

	MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(null))
		.setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.WEIGHTED_SUM, treeModels, alpha.getValues()))
		.setOutput(ModelUtil.createPredictedOutput(FieldName.create("adaValue"), OpType.CONTINUOUS, DataType.DOUBLE));

	return MiningModelUtil.createBinaryLogisticClassification(miningModel, 2d, 0d, RegressionModel.NormalizationMethod.LOGIT, true, schema);
}

Source File: RangerConverter.java From jpmml-r with GNU Affero General Public License v3.0

6 votes

@Override
public MiningModel encodeModel(Schema schema){
	RGenericVector ranger = getObject();

	RStringVector treetype = ranger.getStringElement("treetype");

	switch(treetype.asScalar()){
		case "Regression":
			return encodeRegression(ranger, schema);
		case "Classification":
			return encodeClassification(ranger, schema);
		case "Probability estimation":
			return encodeProbabilityForest(ranger, schema);
		default:
			throw new IllegalArgumentException();
	}
}

Source File: GBDTLRClassifier.java From jpmml-sklearn with GNU Affero General Public License v3.0

6 votes

@Override
public Model encodeModel(Schema schema){
	Classifier gbdt = getGBDT();
	MultiOneHotEncoder ohe = getOHE();
	LinearClassifier lr = getLR();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	SchemaUtil.checkSize(2, categoricalLabel);

	List<? extends Number> coef = lr.getCoef();
	List<? extends Number> intercept = lr.getIntercept();

	Schema segmentSchema = schema.toAnonymousSchema();

	MiningModel miningModel = GBDTUtil.encodeModel(gbdt, ohe, coef, Iterables.getOnlyElement(intercept), segmentSchema)
		.setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction"), OpType.CONTINUOUS, DataType.DOUBLE));

	return MiningModelUtil.createBinaryLogisticClassification(miningModel, 1d, 0d, RegressionModel.NormalizationMethod.LOGIT, lr.hasProbabilityDistribution(), schema);
}

Source File: MiningModelUtil.java From pyramid with Apache License 2.0

6 votes

static
public MiningModel createModelChain(List<? extends Model> models, Schema schema){

    if(models.size() < 1){
        throw new IllegalArgumentException();
    }

    Segmentation segmentation = createSegmentation(Segmentation.MultipleModelMethod.MODEL_CHAIN, models);

    Model lastModel = Iterables.getLast(models);

    MiningModel miningModel = new MiningModel(lastModel.getMiningFunction(), ModelUtil.createMiningSchema(schema.getLabel()))
            .setMathContext(ModelUtil.simplifyMathContext(lastModel.getMathContext()))
            .setSegmentation(segmentation);

    return miningModel;
}

Source File: VotingRegressor.java From jpmml-sklearn with GNU Affero General Public License v3.0

6 votes

@Override
public Model encodeModel(Schema schema){
	List<? extends Regressor> estimators = getEstimators();
	List<? extends Number> weights = getWeights();

	List<Model> models = new ArrayList<>();

	for(Regressor estimator : estimators){
		Model model = estimator.encodeModel(schema);

		models.add(model);
	}

	Segmentation.MultipleModelMethod multipleModelMethod = (weights != null && weights.size() > 0 ? Segmentation.MultipleModelMethod.WEIGHTED_AVERAGE : Segmentation.MultipleModelMethod.AVERAGE);

	MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(schema.getLabel()))
		.setSegmentation(MiningModelUtil.createSegmentation(multipleModelMethod, models, weights));

	return miningModel;
}

Source File: GLMNetConverter.java From jpmml-r with GNU Affero General Public License v3.0

6 votes

@Override
public Model encodeModel(Schema schema){
	RGenericVector glmnet = getObject();

	RDoubleVector a0 = glmnet.getDoubleElement("a0");
	RExp beta = glmnet.getElement("beta");
	RDoubleVector lambda = glmnet.getDoubleElement("lambda");

	Double lambdaS = getLambdaS();
	if(lambdaS == null){
		lambdaS = loadLambdaS();
	}

	int column = (lambda.getValues()).indexOf(lambdaS);
	if(column < 0){
		throw new IllegalArgumentException();
	}

	return encodeModel(a0, beta, column, schema);
}

Source File: LinearSVCModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0

6 votes

@Override
public MiningModel encodeModel(Schema schema){
	LinearSVCModel model = getTransformer();

	Transformation transformation = new AbstractTransformation(){

		@Override
		public Expression createExpression(FieldRef fieldRef){
			return PMMLUtil.createApply(PMMLFunctions.THRESHOLD)
				.addExpressions(fieldRef, PMMLUtil.createConstant(model.getThreshold()));
		}
	};

	Schema segmentSchema = schema.toAnonymousRegressorSchema(DataType.DOUBLE);

	Model linearModel = LinearModelUtil.createRegression(this, model.coefficients(), model.intercept(), segmentSchema)
		.setOutput(ModelUtil.createPredictedOutput(FieldName.create("margin"), OpType.CONTINUOUS, DataType.DOUBLE, transformation));

	return MiningModelUtil.createBinaryLogisticClassification(linearModel, 1d, 0d, RegressionModel.NormalizationMethod.NONE, false, schema);
}

Source File: AdaBoostRegressor.java From jpmml-sklearn with GNU Affero General Public License v3.0

6 votes

@Override
public MiningModel encodeModel(Schema schema){
	List<? extends Regressor> estimators = getEstimators();
	List<? extends Number> estimatorWeights = getEstimatorWeights();

	Schema segmentSchema = schema.toAnonymousSchema();

	List<Model> models = new ArrayList<>();

	for(Regressor estimator : estimators){
		Model model = estimator.encodeModel(segmentSchema);

		models.add(model);
	}

	MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(schema.getLabel()))
		.setSegmentation(MiningModelUtil.createSegmentation(MultipleModelMethod.WEIGHTED_MEDIAN, models, estimatorWeights));

	return miningModel;
}

Source File: TreeUtil.java From jpmml-sklearn with GNU Affero General Public License v3.0

6 votes

static
public <E extends Estimator & HasTree> TreeModel encodeTreeModel(E estimator, PredicateManager predicateManager, ScoreDistributionManager scoreDistributionManager, MiningFunction miningFunction, Schema schema){
	Tree tree = estimator.getTree();

	int[] leftChildren = tree.getChildrenLeft();
	int[] rightChildren = tree.getChildrenRight();
	int[] features = tree.getFeature();
	double[] thresholds = tree.getThreshold();
	double[] values = tree.getValues();

	Node root = encodeNode(True.INSTANCE, predicateManager, scoreDistributionManager, 0, leftChildren, rightChildren, features, thresholds, values, miningFunction, schema);

	TreeModel treeModel = new TreeModel(miningFunction, ModelUtil.createMiningSchema(schema.getLabel()), root)
		.setSplitCharacteristic(TreeModel.SplitCharacteristic.BINARY_SPLIT);

	ClassDictUtil.clearContent(tree);

	return treeModel;
}

Source File: RangerConverter.java From jpmml-r with GNU Affero General Public License v3.0

6 votes

private MiningModel encodeRegression(RGenericVector ranger, Schema schema){
	RGenericVector forest = ranger.getGenericElement("forest");

	ScoreEncoder scoreEncoder = new ScoreEncoder(){

		@Override
		public Node encode(Node node, Number splitValue, RNumberVector<?> terminalClassCount){
			node.setScore(splitValue);

			return node;
		}
	};

	List<TreeModel> treeModels = encodeForest(forest, MiningFunction.REGRESSION, scoreEncoder, schema);

	MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(schema.getLabel()))
		.setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.AVERAGE, treeModels));

	return miningModel;
}

Source File: TreePredictorUtil.java From jpmml-sklearn with GNU Affero General Public License v3.0

6 votes

static
public TreeModel encodeTreeModel(TreePredictor treePredictor, PredicateManager predicateManager, Schema schema){
	int[] leaf = treePredictor.isLeaf();
	int[] leftChildren = treePredictor.getLeft();
	int[] rightChildren = treePredictor.getRight();
	int[] featureIdx = treePredictor.getFeatureIdx();
	double[] thresholds = treePredictor.getThreshold();
	int[] missingGoToLeft = treePredictor.getMissingGoToLeft();
	double[] values = treePredictor.getValues();

	Node root = encodeNode(True.INSTANCE, predicateManager, 0, leaf, leftChildren, rightChildren, featureIdx, thresholds, missingGoToLeft, values, schema);

	TreeModel treeModel = new TreeModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(schema.getLabel()), root)
		.setSplitCharacteristic(TreeModel.SplitCharacteristic.BINARY_SPLIT)
		.setMissingValueStrategy(TreeModel.MissingValueStrategy.DEFAULT_CHILD);

	return treeModel;
}

Source File: RegTree.java From jpmml-xgboost with GNU Affero General Public License v3.0

5 votes

public TreeModel encodeTreeModel(PredicateManager predicateManager, Schema schema){
	org.dmg.pmml.tree.Node root = encodeNode(True.INSTANCE, predicateManager, 0, schema);

	TreeModel treeModel = new TreeModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(schema.getLabel()), root)
		.setSplitCharacteristic(TreeModel.SplitCharacteristic.BINARY_SPLIT)
		.setMissingValueStrategy(TreeModel.MissingValueStrategy.DEFAULT_CHILD)
		.setMathContext(MathContext.FLOAT);

	return treeModel;
}

Source File: Learner.java From jpmml-xgboost with GNU Affero General Public License v3.0

5 votes

public MiningModel encodeMiningModel(Map<String, ?> options, Schema schema){
	Boolean compact = (Boolean)options.get(HasXGBoostOptions.OPTION_COMPACT);
	Integer ntreeLimit = (Integer)options.get(HasXGBoostOptions.OPTION_NTREE_LIMIT);

	MiningModel miningModel = this.gbtree.encodeMiningModel(this.obj, this.base_score, ntreeLimit, schema)
		.setAlgorithmName("XGBoost (" + this.gbtree.getAlgorithmName() + ")");

	if((Boolean.TRUE).equals(compact)){
		Visitor visitor = new TreeModelCompactor();

		visitor.applyTo(miningModel);
	}

	return miningModel;
}

Source File: KMeansConverter.java From jpmml-r with GNU Affero General Public License v3.0

5 votes

@Override
public Model encodeModel(Schema schema){
	RGenericVector kmeans = getObject();

	RDoubleVector centers = kmeans.getDoubleElement("centers");
	RIntegerVector size = kmeans.getIntegerElement("size");

	RIntegerVector centersDim = centers.dim();

	int rows = centersDim.getValue(0);
	int columns = centersDim.getValue(1);

	List<Cluster> clusters = new ArrayList<>();

	RStringVector rowNames = centers.dimnames(0);
	for(int i = 0; i < rowNames.size(); i++){
		Cluster cluster = new Cluster(PMMLUtil.createRealArray(FortranMatrixUtil.getRow(centers.getValues(), rows, columns, i)))
			.setId(String.valueOf(i + 1))
			.setName(rowNames.getValue(i))
			.setSize(size.getValue(i));

		clusters.add(cluster);
	}

	ComparisonMeasure comparisonMeasure = new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE, new SquaredEuclidean())
		.setCompareFunction(CompareFunction.ABS_DIFF);

	ClusteringModel clusteringModel = new ClusteringModel(MiningFunction.CLUSTERING, ClusteringModel.ModelClass.CENTER_BASED, rows, ModelUtil.createMiningSchema(schema.getLabel()), comparisonMeasure, ClusteringModelUtil.createClusteringFields(schema.getFeatures()), clusters)
		.setOutput(ClusteringModelUtil.createOutput(FieldName.create("cluster"), DataType.DOUBLE, clusters));

	return clusteringModel;
}

Source File: GradientBoostingUtil.java From jpmml-sklearn with GNU Affero General Public License v3.0

5 votes

static
public <E extends Estimator & HasEstimatorEnsemble<TreeRegressor> & HasTreeOptions> MiningModel encodeGradientBoosting(E estimator, Number initialPrediction, Number learningRate, Schema schema){
	ContinuousLabel continuousLabel = (ContinuousLabel)schema.getLabel();

	List<TreeModel> treeModels = TreeUtil.encodeTreeModelEnsemble(estimator, MiningFunction.REGRESSION, schema);

	MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(continuousLabel))
		.setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.SUM, treeModels))
		.setTargets(ModelUtil.createRescaleTargets(learningRate, initialPrediction, continuousLabel));

	return TreeUtil.transform(estimator, miningModel);
}

Source File: LinearRegressor.java From jpmml-sklearn with GNU Affero General Public License v3.0

5 votes

@Override
public RegressionModel encodeModel(Schema schema){
	List<? extends Number> coef = getCoef();
	List<? extends Number> intercept = getIntercept();

	return RegressionModelUtil.createRegression(schema.getFeatures(), coef, Iterables.getOnlyElement(intercept), null, schema);
}

Source File: IForestConverter.java From jpmml-r with GNU Affero General Public License v3.0

5 votes

private TreeModel encodeTreeModel(RGenericVector trees, int index, Schema schema){
	RIntegerVector nrnodes = trees.getIntegerElement("nrnodes");
	RIntegerVector ntree = trees.getIntegerElement("ntree");
	RIntegerVector nodeStatus = trees.getIntegerElement("nodeStatus");
	RIntegerVector leftDaughter = trees.getIntegerElement("lDaughter");
	RIntegerVector rightDaughter = trees.getIntegerElement("rDaughter");
	RIntegerVector splitAtt = trees.getIntegerElement("splitAtt");
	RDoubleVector splitPoint = trees.getDoubleElement("splitPoint");
	RIntegerVector nSam = trees.getIntegerElement("nSam");

	int rows = nrnodes.asScalar();
	int columns = ntree.asScalar();

	Node root = encodeNode(
		True.INSTANCE,
		0,
		0,
		FortranMatrixUtil.getColumn(nodeStatus.getValues(), rows, columns, index),
		FortranMatrixUtil.getColumn(nSam.getValues(), rows, columns, index),
		FortranMatrixUtil.getColumn(leftDaughter.getValues(), rows, columns, index),
		FortranMatrixUtil.getColumn(rightDaughter.getValues(), rows, columns, index),
		FortranMatrixUtil.getColumn(splitAtt.getValues(), rows, columns, index),
		FortranMatrixUtil.getColumn(splitPoint.getValues(), rows, columns, index),
		schema
	);

	TreeModel treeModel = new TreeModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(schema.getLabel()), root)
		.setSplitCharacteristic(TreeModel.SplitCharacteristic.BINARY_SPLIT);

	return treeModel;
}

Source File: BinaryTreeConverter.java From jpmml-r with GNU Affero General Public License v3.0

5 votes

private Node encodeScore(Node node, RDoubleVector probabilities, Schema schema){

		switch(this.miningFunction){
			case REGRESSION:
				return encodeRegressionScore(node, probabilities);
			case CLASSIFICATION:
				return encodeClassificationScore(node, probabilities, schema);
			default:
				throw new IllegalArgumentException();
		}
	}

Source File: BinaryTreeConverter.java From jpmml-r with GNU Affero General Public License v3.0

5 votes

private TreeModel encodeTreeModel(RGenericVector tree, Schema schema){
	Node root = encodeNode(True.INSTANCE, tree, schema);

	TreeModel treeModel = new TreeModel(this.miningFunction, ModelUtil.createMiningSchema(schema.getLabel()), root)
		.setSplitCharacteristic(TreeModel.SplitCharacteristic.BINARY_SPLIT);

	return treeModel;
}

Source File: SVMConverter.java From jpmml-r with GNU Affero General Public License v3.0

5 votes

static
private SupportVectorMachineModel encodeClassification(org.dmg.pmml.support_vector_machine.Kernel kernel, RDoubleVector sv, RIntegerVector nSv, RDoubleVector rho, RDoubleVector coefs, Schema schema){
	RStringVector rowNames = sv.dimnames(0);
	RStringVector columnNames = sv.dimnames(1);

	return LibSVMUtil.createClassification(kernel, new FortranMatrix<>(sv.getValues(), rowNames.size(), columnNames.size()), nSv.getValues(), rowNames.getValues(), rho.getValues(), Lists.transform(coefs.getValues(), SVMConverter.FUNCTION_NEGATE), schema);
}

Source File: LogisticRegression.java From jpmml-xgboost with GNU Affero General Public License v3.0

5 votes

@Override
public MiningModel encodeMiningModel(List<RegTree> trees, List<Float> weights, float base_score, Integer ntreeLimit, Schema schema){
	Schema segmentSchema = schema.toAnonymousSchema();

	MiningModel miningModel = createMiningModel(trees, weights, base_score, ntreeLimit, segmentSchema)
		.setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue"), OpType.CONTINUOUS, DataType.FLOAT));

	return MiningModelUtil.createRegression(miningModel, RegressionModel.NormalizationMethod.LOGIT, schema);
}

Source File: BinomialLogisticRegression.java From jpmml-xgboost with GNU Affero General Public License v3.0

5 votes

@Override
public MiningModel encodeMiningModel(List<RegTree> trees, List<Float> weights, float base_score, Integer ntreeLimit, Schema schema){
	Schema segmentSchema = schema.toAnonymousRegressorSchema(DataType.FLOAT);

	MiningModel miningModel = createMiningModel(trees, weights, base_score, ntreeLimit, segmentSchema)
		.setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue"), OpType.CONTINUOUS, DataType.FLOAT));

	return MiningModelUtil.createBinaryLogisticClassification(miningModel, 1d, 0d, RegressionModel.NormalizationMethod.LOGIT, true, schema);
}

Source File: GBDTLMRegressor.java From jpmml-sklearn with GNU Affero General Public License v3.0

5 votes

@Override
public Model encodeModel(Schema schema){
	Regressor gbdt = getGBDT();
	MultiOneHotEncoder ohe = getOHE();
	LinearRegressor lm = getLM();

	List<? extends Number> coef = lm.getCoef();
	List<? extends Number> intercept = lm.getIntercept();

	return GBDTUtil.encodeModel(gbdt, ohe, coef, Iterables.getOnlyElement(intercept), schema);
}

Source File: KNeighborsRegressor.java From jpmml-sklearn with GNU Affero General Public License v3.0

5 votes

@Override
public NearestNeighborModel encodeModel(Schema schema){
	int[] shape = getFitXShape();

	int numberOfInstances = shape[0];
	int numberOfFeatures = shape[1];

	NearestNeighborModel nearestNeighborModel = KNeighborsUtil.encodeNeighbors(this, MiningFunction.REGRESSION, numberOfInstances, numberOfFeatures, schema)
		.setContinuousScoringMethod(NearestNeighborModel.ContinuousScoringMethod.AVERAGE);

	return nearestNeighborModel;
}

Source File: GeneralizedLinearRegressor.java From jpmml-sklearn with GNU Affero General Public License v3.0

5 votes

@Override
public RegressionModel encodeModel(Schema schema){
	RegressionModel regressionModel = super.encodeModel(schema)
		.setNormalizationMethod(RegressionModel.NormalizationMethod.EXP);

	return regressionModel;
}

Source File: BaggingRegressor.java From jpmml-sklearn with GNU Affero General Public License v3.0

5 votes

@Override
public MiningModel encodeModel(Schema schema){
	List<? extends Regressor> estimators = getEstimators();
	List<List<Integer>> estimatorsFeatures = getEstimatorsFeatures();

	MiningModel miningModel = BaggingUtil.encodeBagging(estimators, estimatorsFeatures, Segmentation.MultipleModelMethod.AVERAGE, MiningFunction.REGRESSION, schema);

	return miningModel;
}

org.jpmml.converter.Schema Java Examples