org.jpmml.converter.SchemaUtil Java Examples

The following examples show how to use org.jpmml.converter.SchemaUtil. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: RegressionTableUtil.java From jpmml-sparkml with GNU Affero General Public License v3.0

6 votes

static
public <C extends ModelConverter<?> & HasRegressionTableOptions> void simplify(C converter, Object identifier, List<Feature> features, List<Double> coefficients){
	SchemaUtil.checkSize(coefficients.size(), features);

	Integer lookupThreshold = (Integer)converter.getOption(HasRegressionTableOptions.OPTION_LOOKUP_THRESHOLD, null);
	if(lookupThreshold == null){
		return;
	}

	Map<FieldName, Long> countMap = features.stream()
		.filter(feature -> (feature instanceof BinaryFeature))
		.collect(Collectors.groupingBy(feature -> ((BinaryFeature)feature).getName(), Collectors.counting()));

	Collection<? extends Map.Entry<FieldName, Long>> entries = countMap.entrySet();
	for(Map.Entry<FieldName, Long> entry : entries){

		if(entry.getValue() < lookupThreshold){
			continue;
		}

		createMapValues(entry.getKey(), identifier, features, coefficients);
	}
}

Example #2

Source File: GBDTLRClassifier.java From jpmml-sklearn with GNU Affero General Public License v3.0

6 votes

@Override
public Model encodeModel(Schema schema){
	Classifier gbdt = getGBDT();
	MultiOneHotEncoder ohe = getOHE();
	LinearClassifier lr = getLR();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	SchemaUtil.checkSize(2, categoricalLabel);

	List<? extends Number> coef = lr.getCoef();
	List<? extends Number> intercept = lr.getIntercept();

	Schema segmentSchema = schema.toAnonymousSchema();

	MiningModel miningModel = GBDTUtil.encodeModel(gbdt, ohe, coef, Iterables.getOnlyElement(intercept), segmentSchema)
		.setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction"), OpType.CONTINUOUS, DataType.DOUBLE));

	return MiningModelUtil.createBinaryLogisticClassification(miningModel, 1d, 0d, RegressionModel.NormalizationMethod.LOGIT, lr.hasProbabilityDistribution(), schema);
}

Example #3

Source File: GeneralizedLinearRegressionModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0

5 votes

@Override
public GeneralRegressionModel encodeModel(Schema schema){
	GeneralizedLinearRegressionModel model = getTransformer();

	Object targetCategory = null;

	MiningFunction miningFunction = getMiningFunction();
	switch(miningFunction){
		case CLASSIFICATION:
			CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

			SchemaUtil.checkSize(2, categoricalLabel);

			targetCategory = categoricalLabel.getValue(1);
			break;
		default:
			break;
	}

	List<Feature> features = new ArrayList<>(schema.getFeatures());
	List<Double> featureCoefficients = new ArrayList<>(VectorUtil.toList(model.coefficients()));

	RegressionTableUtil.simplify(this, targetCategory, features, featureCoefficients);

	GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERALIZED_LINEAR, miningFunction, ModelUtil.createMiningSchema(schema.getLabel()), null, null, null)
		.setDistribution(parseFamily(model.getFamily()))
		.setLinkFunction(parseLinkFunction(model.getLink()))
		.setLinkParameter(parseLinkParameter(model.getLink()));

	GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, features, featureCoefficients, model.intercept(), targetCategory);

	return generalRegressionModel;
}

Example #4

Source File: StandardScalerModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0

4 votes

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	StandardScalerModel transformer = getTransformer();

	Vector mean = transformer.mean();
	Vector std = transformer.std();

	boolean withMean = transformer.getWithMean();
	boolean withStd = transformer.getWithStd();

	List<Feature> features = encoder.getFeatures(transformer.getInputCol());

	if(withMean){
		SchemaUtil.checkSize(mean.size(), features);
	} // End if

	if(withStd){
		SchemaUtil.checkSize(std.size(), features);
	}

	List<Feature> result = new ArrayList<>();

	for(int i = 0, length = features.size(); i < length; i++){
		Feature feature = features.get(i);

		FieldName name = formatName(transformer, i, length);

		Expression expression = null;

		if(withMean){
			double meanValue = mean.apply(i);

			if(!ValueUtil.isZero(meanValue)){
				ContinuousFeature continuousFeature = feature.toContinuousFeature();

				expression = PMMLUtil.createApply(PMMLFunctions.SUBTRACT, continuousFeature.ref(), PMMLUtil.createConstant(meanValue));
			}
		} // End if

		if(withStd){
			double stdValue = std.apply(i);

			if(!ValueUtil.isOne(stdValue)){
				Double factor = (1d / stdValue);

				if(expression != null){
					expression = PMMLUtil.createApply(PMMLFunctions.MULTIPLY, expression, PMMLUtil.createConstant(factor));
				} else

				{
					feature = new ProductFeature(encoder, feature, factor){

						@Override
						public ContinuousFeature toContinuousFeature(){
							Supplier<Apply> applySupplier = () -> {
								Feature feature = getFeature();
								Number factor = getFactor();

								return PMMLUtil.createApply(PMMLFunctions.MULTIPLY, (feature.toContinuousFeature()).ref(), PMMLUtil.createConstant(factor));
							};

							return toContinuousFeature(name, DataType.DOUBLE, applySupplier);
						}
					};
				}
			}
		} // End if

		if(expression != null){
			DerivedField derivedField = encoder.createDerivedField(name, OpType.CONTINUOUS, DataType.DOUBLE, expression);

			result.add(new ContinuousFeature(encoder, derivedField));
		} else

		{
			result.add(feature);
		}
	}

	return result;
}

Example #5

Source File: VectorIndexerModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0

4 votes

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	VectorIndexerModel transformer = getTransformer();

	int numFeatures = transformer.numFeatures();

	List<Feature> features = encoder.getFeatures(transformer.getInputCol());

	SchemaUtil.checkSize(numFeatures, features);

	Map<Integer, Map<Double, Integer>> categoryMaps = transformer.javaCategoryMaps();

	List<Feature> result = new ArrayList<>();

	for(int i = 0, length = numFeatures; i < length; i++){
		Feature feature = features.get(i);

		Map<Double, Integer> categoryMap = categoryMaps.get(i);
		if(categoryMap != null){
			List<Double> categories = new ArrayList<>();
			List<Integer> values = new ArrayList<>();

			List<Map.Entry<Double, Integer>> entries = new ArrayList<>(categoryMap.entrySet());
			Collections.sort(entries, VectorIndexerModelConverter.COMPARATOR);

			for(Map.Entry<Double, Integer> entry : entries){
				Double category = entry.getKey();
				Integer value = entry.getValue();

				categories.add(category);
				values.add(value);
			}

			encoder.toCategorical(feature.getName(), categories);

			MapValues mapValues = PMMLUtil.createMapValues(feature.getName(), categories, values)
				.setDataType(DataType.INTEGER);

			DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i, length), OpType.CATEGORICAL, DataType.INTEGER, mapValues);

			result.add(new CategoricalFeature(encoder, derivedField, values));
		} else

		{
			result.add((ContinuousFeature)feature);
		}
	}

	return result;
}

Example #6

Source File: LinearDiscriminantAnalysis.java From jpmml-sklearn with GNU Affero General Public License v3.0

4 votes

private Model encodeMultinomialModel(Schema schema){
	String sklearnVersion = getSkLearnVersion();
	int[] shape = getCoefShape();

	int numberOfClasses = shape[0];
	int numberOfFeatures = shape[1];

	List<? extends Number> coef = getCoef();
	List<? extends Number> intercept = getIntercept();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	List<? extends Feature> features = schema.getFeatures();

	// See https://github.com/scikit-learn/scikit-learn/issues/6848
	boolean corrected = (sklearnVersion != null && SkLearnUtil.compareVersion(sklearnVersion, "0.21") >= 0);

	if(!corrected){
		return super.encodeModel(schema);
	} // End if

	if(numberOfClasses >= 3){
		SchemaUtil.checkSize(numberOfClasses, categoricalLabel);

		Schema segmentSchema = (schema.toAnonymousRegressorSchema(DataType.DOUBLE)).toEmptySchema();

		List<RegressionModel> regressionModels = new ArrayList<>();

		for(int i = 0, rows = categoricalLabel.size(); i < rows; i++){
			RegressionModel regressionModel = RegressionModelUtil.createRegression(features, CMatrixUtil.getRow(coef, numberOfClasses, numberOfFeatures, i), intercept.get(i), RegressionModel.NormalizationMethod.NONE, segmentSchema)
				.setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE));

			regressionModels.add(regressionModel);
		}

		return MiningModelUtil.createClassification(regressionModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema);
	} else

	{
		throw new IllegalArgumentException();
	}
}

Example #7

Source File: GLMConverter.java From jpmml-r with GNU Affero General Public License v3.0

4 votes

@Override
public Model encodeModel(Schema schema){
	RGenericVector glm = getObject();

	RDoubleVector coefficients = glm.getDoubleElement("coefficients");
	RGenericVector family = glm.getGenericElement("family");

	Double intercept = coefficients.getElement(getInterceptName(), false);

	RStringVector familyFamily = family.getStringElement("family");
	RStringVector familyLink = family.getStringElement("link");

	Label label = schema.getLabel();
	List<? extends Feature> features = schema.getFeatures();

	SchemaUtil.checkSize(coefficients.size() - (intercept != null ? 1 : 0), features);

	List<Double> featureCoefficients = getFeatureCoefficients(features, coefficients);

	MiningFunction miningFunction = getMiningFunction(familyFamily.asScalar());

	Object targetCategory = null;

	switch(miningFunction){
		case CLASSIFICATION:
			{
				CategoricalLabel categoricalLabel = (CategoricalLabel)label;

				SchemaUtil.checkSize(2, categoricalLabel);

				targetCategory = categoricalLabel.getValue(1);
			}
			break;
		default:
			break;
	}

	GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERALIZED_LINEAR, miningFunction, ModelUtil.createMiningSchema(label), null, null, null)
		.setDistribution(parseFamily(familyFamily.asScalar()))
		.setLinkFunction(parseLinkFunction(familyLink.asScalar()))
		.setLinkParameter(parseLinkParameter(familyLink.asScalar()));

	GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, features, featureCoefficients, intercept, targetCategory);

	switch(miningFunction){
		case CLASSIFICATION:
			generalRegressionModel.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, (CategoricalLabel)label));
			break;
		default:
			break;
	}

	return generalRegressionModel;
}

Example #8

Source File: LMConverter.java From jpmml-r with GNU Affero General Public License v3.0

4 votes

@Override
public Model encodeModel(Schema schema){
	RGenericVector lm = getObject();

	RDoubleVector coefficients = lm.getDoubleElement("coefficients");

	Double intercept = coefficients.getElement(getInterceptName(), false);

	List<? extends Feature> features = schema.getFeatures();

	SchemaUtil.checkSize(coefficients.size() - (intercept != null ? 1 : 0), features);

	List<Double> featureCoefficients = getFeatureCoefficients(features, coefficients);

	return RegressionModelUtil.createRegression(features, featureCoefficients, intercept, null, schema);
}

Example #9

Source File: EarthConverter.java From jpmml-r with GNU Affero General Public License v3.0

4 votes

@Override
public GeneralRegressionModel encodeModel(Schema schema){
	RGenericVector earth = getObject();

	RDoubleVector coefficients = earth.getDoubleElement("coefficients");

	Double intercept = coefficients.getValue(0);

	List<? extends Feature> features = schema.getFeatures();

	SchemaUtil.checkSize(coefficients.size() - 1, features);

	List<Double> featureCoefficients = (coefficients.getValues()).subList(1, features.size() + 1);

	GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERALIZED_LINEAR, MiningFunction.REGRESSION, ModelUtil.createMiningSchema(schema.getLabel()), null, null, null)
		.setLinkFunction(GeneralRegressionModel.LinkFunction.IDENTITY);

	GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, features, featureCoefficients, intercept, null);

	return generalRegressionModel;
}

Example #10

Source File: VectorSizeHintConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0

3 votes

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	VectorSizeHint transformer = getTransformer();

	int size = transformer.getSize();

	List<Feature> features = encoder.getFeatures(transformer.getInputCol());

	SchemaUtil.checkSize(size, features);

	return features;
}

Example #11

Source File: IDFModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0

3 votes

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	IDFModel transformer = getTransformer();

	Vector idf = transformer.idf();

	List<Feature> features = encoder.getFeatures(transformer.getInputCol());

	SchemaUtil.checkSize(idf.size(), features);

	List<Feature> result = new ArrayList<>();

	for(int i = 0; i < features.size(); i++){
		Feature feature = features.get(i);
		Double weight = idf.apply(i);

		ProductFeature productFeature = new ProductFeature(encoder, feature, weight){

			private WeightedTermFeature weightedTermFeature = null;


			@Override
			public ContinuousFeature toContinuousFeature(){

				if(this.weightedTermFeature == null){
					TermFeature termFeature = (TermFeature)getFeature();
					Number factor = getFactor();

					this.weightedTermFeature = termFeature.toWeightedTermFeature(factor);
				}

				return this.weightedTermFeature.toContinuousFeature();
			}
		};

		result.add(productFeature);
	}

	return result;
}

Example #12

Source File: LRMConverter.java From jpmml-r with GNU Affero General Public License v3.0

3 votes

@Override
public Model encodeModel(Schema schema){
	RGenericVector lrm = getObject();

	RDoubleVector coefficients = lrm.getDoubleElement("coefficients");

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	SchemaUtil.checkSize(2, categoricalLabel);

	Object targetCategory = categoricalLabel.getValue(1);

	Double intercept = coefficients.getElement(getInterceptName(), false);

	List<? extends Feature> features = schema.getFeatures();

	SchemaUtil.checkSize(coefficients.size() - (intercept != null ? 1 : 0), features);

	List<Double> featureCoefficients = getFeatureCoefficients(features, coefficients);

	GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERALIZED_LINEAR, MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), null, null, null)
		.setLinkFunction(GeneralRegressionModel.LinkFunction.LOGIT)
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));

	GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, features, featureCoefficients, intercept, targetCategory);

	return generalRegressionModel;
}

Example #13

Source File: LinearClassifier.java From jpmml-sklearn with GNU Affero General Public License v3.0

2 votes

@Override
public Model encodeModel(Schema schema){
	int[] shape = getCoefShape();

	int numberOfClasses = shape[0];
	int numberOfFeatures = shape[1];

	boolean hasProbabilityDistribution = hasProbabilityDistribution();

	List<? extends Number> coef = getCoef();
	List<? extends Number> intercept = getIntercept();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	List<? extends Feature> features = schema.getFeatures();

	if(numberOfClasses == 1){
		SchemaUtil.checkSize(2, categoricalLabel);

		return RegressionModelUtil.createBinaryLogisticClassification(features, CMatrixUtil.getRow(coef, numberOfClasses, numberOfFeatures, 0), intercept.get(0), RegressionModel.NormalizationMethod.LOGIT, hasProbabilityDistribution, schema);
	} else

	if(numberOfClasses >= 3){
		SchemaUtil.checkSize(numberOfClasses, categoricalLabel);

		Schema segmentSchema = (schema.toAnonymousRegressorSchema(DataType.DOUBLE)).toEmptySchema();

		List<RegressionModel> regressionModels = new ArrayList<>();

		for(int i = 0, rows = categoricalLabel.size(); i < rows; i++){
			RegressionModel regressionModel = RegressionModelUtil.createRegression(features, CMatrixUtil.getRow(coef, numberOfClasses, numberOfFeatures, i), intercept.get(i), RegressionModel.NormalizationMethod.LOGIT, segmentSchema)
				.setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE));

			regressionModels.add(regressionModel);
		}

		return MiningModelUtil.createClassification(regressionModels, RegressionModel.NormalizationMethod.SIMPLEMAX, hasProbabilityDistribution, schema);
	} else

	{
		throw new IllegalArgumentException();
	}
}