org.jpmml.converter.CategoricalLabel Java Examples

The following examples show how to use org.jpmml.converter.CategoricalLabel. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Classification.java    From jpmml-lightgbm with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public Label encodeLabel(FieldName targetField, List<?> targetCategories, PMMLEncoder encoder){
	DataField dataField;

	if(targetCategories == null){
		targetCategories = LabelUtil.createTargetCategories(this.num_class_);

		dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.INTEGER, targetCategories);
	} else

	{
		if(targetCategories.size() != this.num_class_){
			throw new IllegalArgumentException("Expected " + this.num_class_ + " target categories, got " + targetCategories.size() + " target categories");
		}

		dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.STRING, targetCategories);
	}

	return new CategoricalLabel(dataField);
}
 
Example #2
Source File: BoostingConverter.java    From jpmml-r with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public Model encodeModel(Schema schema){
	RGenericVector boosting = getObject();

	RGenericVector trees = boosting.getGenericElement("trees");
	RDoubleVector weights = boosting.getDoubleElement("weights");

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	List<TreeModel> treeModels = encodeTreeModels(trees);

	MiningModel miningModel = new MiningModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel))
		.setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.WEIGHTED_MAJORITY_VOTE, treeModels, weights.getValues()))
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));

	return miningModel;
}
 
Example #3
Source File: GeneralizedLinearRegressionModelConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public List<OutputField> registerOutputFields(Label label, Model pmmlModel, SparkMLEncoder encoder){
	GeneralizedLinearRegressionModel model = getTransformer();

	List<OutputField> result = super.registerOutputFields(label, pmmlModel, encoder);

	MiningFunction miningFunction = getMiningFunction();
	switch(miningFunction){
		case CLASSIFICATION:
			CategoricalLabel categoricalLabel = (CategoricalLabel)label;

			result = new ArrayList<>(result);
			result.addAll(ModelUtil.createProbabilityFields(DataType.DOUBLE, categoricalLabel.getValues()));
			break;
		default:
			break;
	}

	return result;
}
 
Example #4
Source File: GBDTLRClassifier.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public Model encodeModel(Schema schema){
	Classifier gbdt = getGBDT();
	MultiOneHotEncoder ohe = getOHE();
	LinearClassifier lr = getLR();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	SchemaUtil.checkSize(2, categoricalLabel);

	List<? extends Number> coef = lr.getCoef();
	List<? extends Number> intercept = lr.getIntercept();

	Schema segmentSchema = schema.toAnonymousSchema();

	MiningModel miningModel = GBDTUtil.encodeModel(gbdt, ohe, coef, Iterables.getOnlyElement(intercept), segmentSchema)
		.setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction"), OpType.CONTINUOUS, DataType.DOUBLE));

	return MiningModelUtil.createBinaryLogisticClassification(miningModel, 1d, 0d, RegressionModel.NormalizationMethod.LOGIT, lr.hasProbabilityDistribution(), schema);
}
 
Example #5
Source File: MultinomialLogisticRegression.java    From jpmml-lightgbm with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public MiningModel encodeMiningModel(List<Tree> trees, Integer numIteration, Schema schema){
	Schema segmentSchema = schema.toAnonymousRegressorSchema(DataType.DOUBLE);

	List<MiningModel> miningModels = new ArrayList<>();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	for(int i = 0, rows = categoricalLabel.size(), columns = (trees.size() / rows); i < rows; i++){
		MiningModel miningModel = createMiningModel(FortranMatrixUtil.getRow(trees, rows, columns, i), numIteration, segmentSchema)
			.setOutput(ModelUtil.createPredictedOutput(FieldName.create("lgbmValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE));

		miningModels.add(miningModel);
	}

	return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema);
}
 
Example #6
Source File: ScoreDistributionManager.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
public List<ScoreDistribution> createScoreDistribution(CategoricalLabel categoricalLabel, double[] recordCounts){
	List<ScoreDistribution> result = new ArrayList<>();

	for(int i = 0; i < categoricalLabel.size(); i++){
		Object value = categoricalLabel.getValue(i);
		double recordCount = recordCounts[i];

		ScoreDistribution scoreDistribution = new InternableScoreDistribution()
			.setValue(value)
			.setRecordCount(recordCount);

		scoreDistribution = intern(scoreDistribution);

		result.add(scoreDistribution);
	}

	return result;
}
 
Example #7
Source File: MultinomialLogisticRegression.java    From jpmml-xgboost with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public MiningModel encodeMiningModel(List<RegTree> trees, List<Float> weights, float base_score, Integer ntreeLimit, Schema schema){
	Schema segmentSchema = schema.toAnonymousRegressorSchema(DataType.FLOAT);

	List<MiningModel> miningModels = new ArrayList<>();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	for(int i = 0, columns = categoricalLabel.size(), rows = (trees.size() / columns); i < columns; i++){
		MiningModel miningModel = createMiningModel(CMatrixUtil.getColumn(trees, rows, columns, i), (weights != null) ? CMatrixUtil.getColumn(weights, rows, columns, i) : null, base_score, ntreeLimit, segmentSchema)
			.setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.FLOAT));

		miningModels.add(miningModel);
	}

	return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema);
}
 
Example #8
Source File: Classification.java    From jpmml-xgboost with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public Label encodeLabel(FieldName targetField, List<?> targetCategories, PMMLEncoder encoder){
	DataField dataField;

	if(targetCategories == null){
		targetCategories = LabelUtil.createTargetCategories(this.num_class);

		dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.INTEGER, targetCategories);
	} else

	{
		if(targetCategories.size() != this.num_class){
			throw new IllegalArgumentException("Expected " + this.num_class + " target categories, got " + targetCategories.size() + " target categories");
		}

		dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.STRING, targetCategories);
	}

	return new CategoricalLabel(dataField);
}
 
Example #9
Source File: BaggingClassifier.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public MiningModel encodeModel(Schema schema){
	List<? extends Classifier> estimators = getEstimators();
	List<List<Integer>> estimatorsFeatures = getEstimatorsFeatures();

	Segmentation.MultipleModelMethod multipleModelMethod = Segmentation.MultipleModelMethod.AVERAGE;

	for(Classifier estimator : estimators){

		if(!estimator.hasProbabilityDistribution()){
			multipleModelMethod = Segmentation.MultipleModelMethod.MAJORITY_VOTE;

			break;
		}
	}

	MiningModel miningModel = BaggingUtil.encodeBagging(estimators, estimatorsFeatures, multipleModelMethod, MiningFunction.CLASSIFICATION, schema)
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, (CategoricalLabel)schema.getLabel()));

	return miningModel;
}
 
Example #10
Source File: TreeClassifier.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public TreeModel encodeModel(Schema schema){
	TreeModel treeModel = TreeUtil.encodeTreeModel(this, MiningFunction.CLASSIFICATION, schema)
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, (CategoricalLabel)schema.getLabel()));

	return TreeUtil.transform(this, treeModel);
}
 
Example #11
Source File: PMMLPipeline.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
private List<String> initProbabilityFields(CategoricalLabel categoricalLabel){
	List<String> probabilityFields = new ArrayList<>();

	List<?> values = categoricalLabel.getValues();
	for(Object value : values){
		probabilityFields.add("probability(" + value + ")"); // XXX
	}

	return probabilityFields;
}
 
Example #12
Source File: MLPClassifier.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public NeuralNetwork encodeModel(Schema schema){
	String activation = getActivation();

	List<? extends HasArray> coefs = getCoefs();
	List<? extends HasArray> intercepts = getIntercepts();

	NeuralNetwork neuralNetwork = MultilayerPerceptronUtil.encodeNeuralNetwork(MiningFunction.CLASSIFICATION, activation, coefs, intercepts, schema)
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, (CategoricalLabel)schema.getLabel()));

	return neuralNetwork;
}
 
Example #13
Source File: GeneralizedLinearRegressionModelConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public GeneralRegressionModel encodeModel(Schema schema){
	GeneralizedLinearRegressionModel model = getTransformer();

	Object targetCategory = null;

	MiningFunction miningFunction = getMiningFunction();
	switch(miningFunction){
		case CLASSIFICATION:
			CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

			SchemaUtil.checkSize(2, categoricalLabel);

			targetCategory = categoricalLabel.getValue(1);
			break;
		default:
			break;
	}

	List<Feature> features = new ArrayList<>(schema.getFeatures());
	List<Double> featureCoefficients = new ArrayList<>(VectorUtil.toList(model.coefficients()));

	RegressionTableUtil.simplify(this, targetCategory, features, featureCoefficients);

	GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERALIZED_LINEAR, miningFunction, ModelUtil.createMiningSchema(schema.getLabel()), null, null, null)
		.setDistribution(parseFamily(model.getFamily()))
		.setLinkFunction(parseLinkFunction(model.getLink()))
		.setLinkParameter(parseLinkParameter(model.getLink()));

	GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, features, featureCoefficients, model.intercept(), targetCategory);

	return generalRegressionModel;
}
 
Example #14
Source File: ForestClassifier.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public MiningModel encodeModel(Schema schema){
	MiningModel miningModel = ForestUtil.encodeBaseForest(this, Segmentation.MultipleModelMethod.AVERAGE, MiningFunction.CLASSIFICATION, schema)
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, (CategoricalLabel)schema.getLabel()));

	return miningModel;
}
 
Example #15
Source File: DummyClassifier.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public TreeModel encodeModel(Schema schema){
	List<?> classes = getClasses();
	List<? extends Number> classPrior = getClassPrior();
	Object constant = getConstant();
	String strategy = getStrategy();

	ClassDictUtil.checkSize(classes, classPrior);

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	int index;

	double[] probabilities;

	switch(strategy){
		case "constant":
			{
				index = classes.indexOf(constant);

				probabilities = new double[classes.size()];
				probabilities[index] = 1d;
			}
			break;
		case "most_frequent":
			{
				index = classPrior.indexOf(Collections.max((List)classPrior));

				probabilities = new double[classes.size()];
				probabilities[index] = 1d;
			}
			break;
		case "prior":
			{
				index = classPrior.indexOf(Collections.max((List)classPrior));

				probabilities = Doubles.toArray(classPrior);
			}
			break;
		default:
			throw new IllegalArgumentException(strategy);
	}

	Node root = new ClassifierNode(ValueUtil.asString(classes.get(index)), True.INSTANCE);

	List<ScoreDistribution> scoreDistributions = root.getScoreDistributions();

	for(int i = 0; i < classes.size(); i++){
		ScoreDistribution scoreDistribution = new ScoreDistribution(ValueUtil.asString(classes.get(i)), probabilities[i]);

		scoreDistributions.add(scoreDistribution);
	}

	TreeModel treeModel = new TreeModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), root)
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));

	return treeModel;
}
 
Example #16
Source File: MiningModelUtil.java    From pyramid with Apache License 2.0 4 votes vote down vote up
static
public MiningModel createClassification(List<? extends Model> models, RegressionModel.NormalizationMethod normalizationMethod, boolean hasProbabilityDistribution, Schema schema){
    CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

    // modified here
    if(categoricalLabel.size() != models.size()){
        throw new IllegalArgumentException();
    } // End if

    if(normalizationMethod != null){

        switch(normalizationMethod){
            case NONE:
            case SIMPLEMAX:
            case SOFTMAX:
                break;
            default:
                throw new IllegalArgumentException();
        }
    }

    MathContext mathContext = null;

    List<RegressionTable> regressionTables = new ArrayList<>();

    for(int i = 0; i < categoricalLabel.size(); i++){
        Model model = models.get(i);

        MathContext modelMathContext = model.getMathContext();
        if(modelMathContext == null){
            modelMathContext = MathContext.DOUBLE;
        } // End if

        if(mathContext == null){
            mathContext = modelMathContext;
        } else

        {
            if(!Objects.equals(mathContext, modelMathContext)){
                throw new IllegalArgumentException();
            }
        }

        Feature feature = MODEL_PREDICTION.apply(model);

        RegressionTable regressionTable = RegressionModelUtil.createRegressionTable(Collections.singletonList(feature), Collections.singletonList(1d), null)
                .setTargetCategory(categoricalLabel.getValue(i));

        regressionTables.add(regressionTable);
    }

    RegressionModel regressionModel = new RegressionModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), regressionTables)
            .setNormalizationMethod(normalizationMethod)
            .setMathContext(ModelUtil.simplifyMathContext(mathContext))
            .setOutput(hasProbabilityDistribution ? ModelUtil.createProbabilityOutput(mathContext, categoricalLabel) : null);

    List<Model> segmentationModels = new ArrayList<>(models);
    segmentationModels.add(regressionModel);

    return createModelChain(segmentationModels, schema);
}
 
Example #17
Source File: GBMConverter.java    From jpmml-r with GNU Affero General Public License v3.0 4 votes vote down vote up
private MiningModel encodeMultinomialClassification(List<TreeModel> treeModels, Double initF, Schema schema){
	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	Schema segmentSchema = schema.toAnonymousRegressorSchema(DataType.DOUBLE);

	List<Model> miningModels = new ArrayList<>();

	for(int i = 0, columns = categoricalLabel.size(), rows = (treeModels.size() / columns); i < columns; i++){
		MiningModel miningModel = createMiningModel(CMatrixUtil.getColumn(treeModels, rows, columns, i), initF, segmentSchema)
			.setOutput(ModelUtil.createPredictedOutput(FieldName.create("gbmValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE));

		miningModels.add(miningModel);
	}

	return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema);
}
 
Example #18
Source File: BaggingConverter.java    From jpmml-r with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public Model encodeModel(Schema schema){
	RGenericVector bagging = getObject();

	RGenericVector trees = bagging.getGenericElement("trees");

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	List<TreeModel> treeModels = encodeTreeModels(trees);

	MiningModel miningModel = new MiningModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel))
		.setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.MAJORITY_VOTE, treeModels))
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));

	return miningModel;
}
 
Example #19
Source File: DNNClassifier.java    From jpmml-tensorflow with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public NeuralNetwork encodeModel(TensorFlowEncoder encoder){
	DataField dataField = encoder.createDataField(FieldName.create("_target"), OpType.CATEGORICAL, DataType.INTEGER);

	NeuralNetwork neuralNetwork = encodeNeuralNetwork(encoder);

	List<NeuralLayer> neuralLayers = neuralNetwork.getNeuralLayers();

	NeuralLayer neuralLayer = Iterables.getLast(neuralLayers);

	List<Neuron> neurons = neuralLayer.getNeurons();

	List<String> categories;

	if(neurons.size() == 1){
		neuralLayer.setActivationFunction(NeuralNetwork.ActivationFunction.LOGISTIC);

		Neuron neuron = Iterables.getOnlyElement(neurons);

		neuralLayer = new NeuralLayer()
			.setActivationFunction(NeuralNetwork.ActivationFunction.IDENTITY);

		categories = Arrays.asList("0", "1");

		// p(no event) = 1 - p(event)
		Neuron passiveNeuron = new Neuron()
			.setId(String.valueOf(neuralLayers.size() + 1) + "/" + categories.get(0))
			.setBias(ValueUtil.floatToDouble(1f))
			.addConnections(new Connection(neuron.getId(), -1f));

		// p(event)
		Neuron activeNeuron = new Neuron()
			.setId(String.valueOf(neuralLayers.size() + 1) + "/" + categories.get(1))
			.setBias(null)
			.addConnections(new Connection(neuron.getId(), 1f));

		neuralLayer.addNeurons(passiveNeuron, activeNeuron);

		neuralNetwork.addNeuralLayers(neuralLayer);

		neurons = neuralLayer.getNeurons();
	} else

	if(neurons.size() > 2){
		neuralLayer
			.setActivationFunction(NeuralNetwork.ActivationFunction.IDENTITY)
			.setNormalizationMethod(NeuralNetwork.NormalizationMethod.SOFTMAX);

		categories = new ArrayList<>();

		for(int i = 0; i < neurons.size(); i++){
			String category = String.valueOf(i);

			categories.add(category);
		}
	} else

	{
		throw new IllegalArgumentException();
	}

	dataField = encoder.toCategorical(dataField.getName(), categories);

	CategoricalLabel categoricalLabel = new CategoricalLabel(dataField);

	neuralNetwork
		.setMiningFunction(MiningFunction.CLASSIFICATION)
		.setMiningSchema(ModelUtil.createMiningSchema(categoricalLabel))
		.setNeuralOutputs(NeuralNetworkUtil.createClassificationNeuralOutputs(neurons, categoricalLabel))
		.setOutput(ModelUtil.createProbabilityOutput(DataType.FLOAT, categoricalLabel));

	return neuralNetwork;
}
 
Example #20
Source File: GLMConverter.java    From jpmml-r with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public Model encodeModel(Schema schema){
	RGenericVector glm = getObject();

	RDoubleVector coefficients = glm.getDoubleElement("coefficients");
	RGenericVector family = glm.getGenericElement("family");

	Double intercept = coefficients.getElement(getInterceptName(), false);

	RStringVector familyFamily = family.getStringElement("family");
	RStringVector familyLink = family.getStringElement("link");

	Label label = schema.getLabel();
	List<? extends Feature> features = schema.getFeatures();

	SchemaUtil.checkSize(coefficients.size() - (intercept != null ? 1 : 0), features);

	List<Double> featureCoefficients = getFeatureCoefficients(features, coefficients);

	MiningFunction miningFunction = getMiningFunction(familyFamily.asScalar());

	Object targetCategory = null;

	switch(miningFunction){
		case CLASSIFICATION:
			{
				CategoricalLabel categoricalLabel = (CategoricalLabel)label;

				SchemaUtil.checkSize(2, categoricalLabel);

				targetCategory = categoricalLabel.getValue(1);
			}
			break;
		default:
			break;
	}

	GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERALIZED_LINEAR, miningFunction, ModelUtil.createMiningSchema(label), null, null, null)
		.setDistribution(parseFamily(familyFamily.asScalar()))
		.setLinkFunction(parseLinkFunction(familyLink.asScalar()))
		.setLinkParameter(parseLinkParameter(familyLink.asScalar()));

	GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, features, featureCoefficients, intercept, targetCategory);

	switch(miningFunction){
		case CLASSIFICATION:
			generalRegressionModel.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, (CategoricalLabel)label));
			break;
		default:
			break;
	}

	return generalRegressionModel;
}
 
Example #21
Source File: RandomForestConverter.java    From jpmml-r with GNU Affero General Public License v3.0 4 votes vote down vote up
private MiningModel encodeClassification(RGenericVector forest, Schema schema){
	RNumberVector<?> bestvar = forest.getNumericElement("bestvar");
	RNumberVector<?> treemap = forest.getNumericElement("treemap");
	RIntegerVector nodepred = forest.getIntegerElement("nodepred");
	RDoubleVector xbestsplit = forest.getDoubleElement("xbestsplit");
	RIntegerVector nrnodes = forest.getIntegerElement("nrnodes");
	RDoubleVector ntree = forest.getDoubleElement("ntree");

	int rows = nrnodes.asScalar();
	int columns = ValueUtil.asInt(ntree.asScalar());

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	ScoreEncoder<Integer> scoreEncoder = new ScoreEncoder<Integer>(){

		@Override
		public Object encode(Integer value){
			return categoricalLabel.getValue(value - 1);
		}
	};

	Schema segmentSchema = schema.toAnonymousSchema();

	List<TreeModel> treeModels = new ArrayList<>();

	for(int i = 0; i < columns; i++){
		List<? extends Number> daughters = FortranMatrixUtil.getColumn(treemap.getValues(), 2 * rows, columns, i);

		TreeModel treeModel = encodeTreeModel(
				MiningFunction.CLASSIFICATION,
				scoreEncoder,
				FortranMatrixUtil.getColumn(daughters, rows, 2, 0),
				FortranMatrixUtil.getColumn(daughters, rows, 2, 1),
				FortranMatrixUtil.getColumn(nodepred.getValues(), rows, columns, i),
				FortranMatrixUtil.getColumn(bestvar.getValues(), rows, columns, i),
				FortranMatrixUtil.getColumn(xbestsplit.getValues(), rows, columns, i),
				segmentSchema
			);

		treeModels.add(treeModel);
	}

	MiningModel miningModel = new MiningModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel))
		.setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.MAJORITY_VOTE, treeModels))
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));

	return miningModel;
}
 
Example #22
Source File: LinearClassifier.java    From jpmml-tensorflow with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public RegressionModel encodeModel(TensorFlowEncoder encoder){
	DataField dataField = encoder.createDataField(FieldName.create("_target"), OpType.CATEGORICAL, DataType.INTEGER);

	RegressionModel regressionModel = encodeRegressionModel(encoder);

	List<RegressionTable> regressionTables = regressionModel.getRegressionTables();

	List<String> categories;

	if(regressionTables.size() == 1){
		categories = Arrays.asList("0", "1");

		RegressionTable activeRegressionTable = regressionTables.get(0)
			.setTargetCategory(categories.get(1));

		RegressionTable passiveRegressionTable = new RegressionTable(0)
			.setTargetCategory(categories.get(0));

		regressionModel.addRegressionTables(passiveRegressionTable);
	} else

	if(regressionTables.size() > 2){
		categories = new ArrayList<>();

		for(int i = 0; i < regressionTables.size(); i++){
			RegressionTable regressionTable = regressionTables.get(i);
			String category = String.valueOf(i);

			regressionTable.setTargetCategory(category);

			categories.add(category);
		}
	} else

	{
		throw new IllegalArgumentException();
	}

	dataField = encoder.toCategorical(dataField.getName(), categories);

	CategoricalLabel categoricalLabel = new CategoricalLabel(dataField);

	regressionModel
		.setMiningFunction(MiningFunction.CLASSIFICATION)
		.setNormalizationMethod(RegressionModel.NormalizationMethod.SOFTMAX)
		.setMiningSchema(ModelUtil.createMiningSchema(categoricalLabel))
		.setOutput(ModelUtil.createProbabilityOutput(DataType.FLOAT, categoricalLabel));

	return regressionModel;
}
 
Example #23
Source File: LinearDiscriminantAnalysis.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
private Model encodeMultinomialModel(Schema schema){
	String sklearnVersion = getSkLearnVersion();
	int[] shape = getCoefShape();

	int numberOfClasses = shape[0];
	int numberOfFeatures = shape[1];

	List<? extends Number> coef = getCoef();
	List<? extends Number> intercept = getIntercept();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	List<? extends Feature> features = schema.getFeatures();

	// See https://github.com/scikit-learn/scikit-learn/issues/6848
	boolean corrected = (sklearnVersion != null && SkLearnUtil.compareVersion(sklearnVersion, "0.21") >= 0);

	if(!corrected){
		return super.encodeModel(schema);
	} // End if

	if(numberOfClasses >= 3){
		SchemaUtil.checkSize(numberOfClasses, categoricalLabel);

		Schema segmentSchema = (schema.toAnonymousRegressorSchema(DataType.DOUBLE)).toEmptySchema();

		List<RegressionModel> regressionModels = new ArrayList<>();

		for(int i = 0, rows = categoricalLabel.size(); i < rows; i++){
			RegressionModel regressionModel = RegressionModelUtil.createRegression(features, CMatrixUtil.getRow(coef, numberOfClasses, numberOfFeatures, i), intercept.get(i), RegressionModel.NormalizationMethod.NONE, segmentSchema)
				.setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE));

			regressionModels.add(regressionModel);
		}

		return MiningModelUtil.createClassification(regressionModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema);
	} else

	{
		throw new IllegalArgumentException();
	}
}
 
Example #24
Source File: LinearModelUtil.java    From jpmml-sparkml with GNU Affero General Public License v3.0 4 votes vote down vote up
static
public <C extends ModelConverter<?> & HasRegressionTableOptions> Model createBinaryLogisticClassification(C converter, Vector coefficients, double intercept, Schema schema){
	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	String representation = (String)converter.getOption(HasRegressionTableOptions.OPTION_REPRESENTATION, null);

	List<Feature> features = new ArrayList<>(schema.getFeatures());
	List<Double> featureCoefficients = new ArrayList<>(VectorUtil.toList(coefficients));

	RegressionTableUtil.simplify(converter, null, features, featureCoefficients);

	if(representation != null && (GeneralRegressionModel.class.getSimpleName()).equalsIgnoreCase(representation)){
		Object targetCategory = categoricalLabel.getValue(1);

		GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERALIZED_LINEAR, MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), null, null, null)
			.setLinkFunction(GeneralRegressionModel.LinkFunction.LOGIT);

		GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, features, featureCoefficients, intercept, targetCategory);

		return generalRegressionModel;
	}

	return RegressionModelUtil.createBinaryLogisticClassification(features, featureCoefficients, intercept, RegressionModel.NormalizationMethod.LOGIT, true, schema);
}
 
Example #25
Source File: MultilayerPerceptronUtil.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
static
public NeuralNetwork encodeNeuralNetwork(MiningFunction miningFunction, String activation, List<? extends HasArray> coefs, List<? extends HasArray> intercepts, Schema schema){
	NeuralNetwork.ActivationFunction activationFunction = parseActivationFunction(activation);

	ClassDictUtil.checkSize(coefs, intercepts);

	Label label = schema.getLabel();
	List<? extends Feature> features = schema.getFeatures();

	NeuralInputs neuralInputs = NeuralNetworkUtil.createNeuralInputs(features, DataType.DOUBLE);

	List<? extends NeuralEntity> entities = neuralInputs.getNeuralInputs();

	List<NeuralLayer> neuralLayers = new ArrayList<>();

	for(int layer = 0; layer < coefs.size(); layer++){
		HasArray coef = coefs.get(layer);
		HasArray intercept = intercepts.get(layer);

		int[] shape = coef.getArrayShape();

		int rows = shape[0];
		int columns = shape[1];

		NeuralLayer neuralLayer = new NeuralLayer();

		List<?> coefMatrix = coef.getArrayContent();
		List<?> interceptVector = intercept.getArrayContent();

		for(int column = 0; column < columns; column++){
			List<? extends Number> weights = (List)CMatrixUtil.getColumn(coefMatrix, rows, columns, column);
			Number bias = (Number)interceptVector.get(column);

			Neuron neuron = NeuralNetworkUtil.createNeuron(entities, weights, bias)
				.setId(String.valueOf(layer + 1) + "/" + String.valueOf(column + 1));

			neuralLayer.addNeurons(neuron);
		}

		neuralLayers.add(neuralLayer);

		entities = neuralLayer.getNeurons();

		if(layer == (coefs.size() - 1)){
			neuralLayer.setActivationFunction(NeuralNetwork.ActivationFunction.IDENTITY);

			switch(miningFunction){
				case REGRESSION:
					break;
				case CLASSIFICATION:
					CategoricalLabel categoricalLabel = (CategoricalLabel)label;

					// Binary classification
					if(categoricalLabel.size() == 2){
						List<NeuralLayer> transformationNeuralLayers = NeuralNetworkUtil.createBinaryLogisticTransformation(Iterables.getOnlyElement(entities));

						neuralLayers.addAll(transformationNeuralLayers);

						neuralLayer = Iterables.getLast(transformationNeuralLayers);

						entities = neuralLayer.getNeurons();
					} else

					// Multi-class classification
					if(categoricalLabel.size() > 2){
						neuralLayer.setNormalizationMethod(NeuralNetwork.NormalizationMethod.SOFTMAX);
					} else

					{
						throw new IllegalArgumentException();
					}
					break;
				default:
					break;
			}
		}
	}

	NeuralOutputs neuralOutputs = null;

	switch(miningFunction){
		case REGRESSION:
			neuralOutputs = NeuralNetworkUtil.createRegressionNeuralOutputs(entities, (ContinuousLabel)label);
			break;
		case CLASSIFICATION:
			neuralOutputs = NeuralNetworkUtil.createClassificationNeuralOutputs(entities, (CategoricalLabel)label);
			break;
		default:
			break;
	}

	NeuralNetwork neuralNetwork = new NeuralNetwork(miningFunction, activationFunction, ModelUtil.createMiningSchema(label), neuralInputs, neuralLayers)
		.setNeuralOutputs(neuralOutputs);

	return neuralNetwork;
}
 
Example #26
Source File: VotingClassifier.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public Model encodeModel(Schema schema){
	List<? extends Classifier> estimators = getEstimators();
	List<? extends Number> weights = getWeights();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	List<Model> models = new ArrayList<>();

	for(Classifier estimator : estimators){
		Model model = estimator.encodeModel(schema);

		models.add(model);
	}

	String voting = getVoting();

	Segmentation.MultipleModelMethod multipleModelMethod = parseVoting(voting, (weights != null && weights.size() > 0));

	MiningModel miningModel = new MiningModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel))
		.setSegmentation(MiningModelUtil.createSegmentation(multipleModelMethod, models, weights))
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));

	return miningModel;
}
 
Example #27
Source File: GaussianNB.java    From jpmml-sklearn with GNU Affero General Public License v3.0 3 votes vote down vote up
@Override
public NaiveBayesModel encodeModel(Schema schema){
	int[] shape = getThetaShape();

	int numberOfClasses = shape[0];
	int numberOfFeatures = shape[1];

	List<? extends Number> theta = getTheta();
	List<? extends Number> sigma = getSigma();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	BayesInputs bayesInputs = new BayesInputs();

	for(int i = 0; i < numberOfFeatures; i++){
		Feature feature = schema.getFeature(i);

		List<? extends Number> means = CMatrixUtil.getColumn(theta, numberOfClasses, numberOfFeatures, i);
		List<? extends Number> variances = CMatrixUtil.getColumn(sigma, numberOfClasses, numberOfFeatures, i);

		ContinuousFeature continuousFeature = feature.toContinuousFeature();

		BayesInput bayesInput = new BayesInput(continuousFeature.getName(), encodeTargetValueStats(categoricalLabel.getValues(), means, variances), null);

		bayesInputs.addBayesInputs(bayesInput);
	}

	List<Integer> classCount = getClassCount();

	BayesOutput bayesOutput = new BayesOutput(categoricalLabel.getName(), null)
		.setTargetValueCounts(encodeTargetValueCounts(categoricalLabel.getValues(), classCount));

	NaiveBayesModel naiveBayesModel = new NaiveBayesModel(0d, MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), bayesInputs, bayesOutput)
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));

	return naiveBayesModel;
}
 
Example #28
Source File: LRMConverter.java    From jpmml-r with GNU Affero General Public License v3.0 3 votes vote down vote up
@Override
public Model encodeModel(Schema schema){
	RGenericVector lrm = getObject();

	RDoubleVector coefficients = lrm.getDoubleElement("coefficients");

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	SchemaUtil.checkSize(2, categoricalLabel);

	Object targetCategory = categoricalLabel.getValue(1);

	Double intercept = coefficients.getElement(getInterceptName(), false);

	List<? extends Feature> features = schema.getFeatures();

	SchemaUtil.checkSize(coefficients.size() - (intercept != null ? 1 : 0), features);

	List<Double> featureCoefficients = getFeatureCoefficients(features, coefficients);

	GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERALIZED_LINEAR, MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), null, null, null)
		.setLinkFunction(GeneralRegressionModel.LinkFunction.LOGIT)
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));

	GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, features, featureCoefficients, intercept, targetCategory);

	return generalRegressionModel;
}
 
Example #29
Source File: LinearModelUtil.java    From jpmml-sparkml with GNU Affero General Public License v3.0 3 votes vote down vote up
static
public <C extends ModelConverter<?> & HasRegressionTableOptions> Model createSoftmaxClassification(C converter, Matrix coefficients, Vector intercepts, Schema schema){
	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	MatrixUtil.checkRows(categoricalLabel.size(), coefficients);

	List<RegressionTable> regressionTables = new ArrayList<>();

	for(int i = 0; i < categoricalLabel.size(); i++){
		Object targetCategory = categoricalLabel.getValue(i);

		List<Feature> features = new ArrayList<>(schema.getFeatures());
		List<Double> featureCoefficients = new ArrayList<>(MatrixUtil.getRow(coefficients, i));

		RegressionTableUtil.simplify(converter, targetCategory, features, featureCoefficients);

		double intercept = intercepts.apply(i);

		RegressionTable regressionTable = RegressionModelUtil.createRegressionTable(features, featureCoefficients, intercept)
			.setTargetCategory(targetCategory);

		regressionTables.add(regressionTable);
	}

	RegressionModel regressionModel = new RegressionModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), regressionTables)
		.setNormalizationMethod(RegressionModel.NormalizationMethod.SOFTMAX);

	return regressionModel;
}
 
Example #30
Source File: LinearClassifier.java    From jpmml-sklearn with GNU Affero General Public License v3.0 2 votes vote down vote up
@Override
public Model encodeModel(Schema schema){
	int[] shape = getCoefShape();

	int numberOfClasses = shape[0];
	int numberOfFeatures = shape[1];

	boolean hasProbabilityDistribution = hasProbabilityDistribution();

	List<? extends Number> coef = getCoef();
	List<? extends Number> intercept = getIntercept();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	List<? extends Feature> features = schema.getFeatures();

	if(numberOfClasses == 1){
		SchemaUtil.checkSize(2, categoricalLabel);

		return RegressionModelUtil.createBinaryLogisticClassification(features, CMatrixUtil.getRow(coef, numberOfClasses, numberOfFeatures, 0), intercept.get(0), RegressionModel.NormalizationMethod.LOGIT, hasProbabilityDistribution, schema);
	} else

	if(numberOfClasses >= 3){
		SchemaUtil.checkSize(numberOfClasses, categoricalLabel);

		Schema segmentSchema = (schema.toAnonymousRegressorSchema(DataType.DOUBLE)).toEmptySchema();

		List<RegressionModel> regressionModels = new ArrayList<>();

		for(int i = 0, rows = categoricalLabel.size(); i < rows; i++){
			RegressionModel regressionModel = RegressionModelUtil.createRegression(features, CMatrixUtil.getRow(coef, numberOfClasses, numberOfFeatures, i), intercept.get(i), RegressionModel.NormalizationMethod.LOGIT, segmentSchema)
				.setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE));

			regressionModels.add(regressionModel);
		}

		return MiningModelUtil.createClassification(regressionModels, RegressionModel.NormalizationMethod.SIMPLEMAX, hasProbabilityDistribution, schema);
	} else

	{
		throw new IllegalArgumentException();
	}
}