Java Code Examples for org.jpmml.converter.PMMLUtil

The following examples show how to use org.jpmml.converter.PMMLUtil. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	RegexTokenizer transformer = getTransformer();

	if(!transformer.getGaps()){
		throw new IllegalArgumentException("Expected splitter mode, got token matching mode");
	} // End if

	if(transformer.getMinTokenLength() != 1){
		throw new IllegalArgumentException("Expected 1 as minimum token length, got " + transformer.getMinTokenLength() + " as minimum token length");
	}

	Feature feature = encoder.getOnlyFeature(transformer.getInputCol());

	Field<?> field = feature.getField();

	if(transformer.getToLowercase()){
		Apply apply = PMMLUtil.createApply(PMMLFunctions.LOWERCASE, feature.ref());

		field = encoder.createDerivedField(FeatureUtil.createName("lowercase", feature), OpType.CATEGORICAL, DataType.STRING, apply);
	}

	return Collections.singletonList(new DocumentFeature(encoder, field, transformer.getPattern()));
}
 
Example 2
@Override
public MiningModel encodeModel(Schema schema){
	LinearSVCModel model = getTransformer();

	Transformation transformation = new AbstractTransformation(){

		@Override
		public Expression createExpression(FieldRef fieldRef){
			return PMMLUtil.createApply(PMMLFunctions.THRESHOLD)
				.addExpressions(fieldRef, PMMLUtil.createConstant(model.getThreshold()));
		}
	};

	Schema segmentSchema = schema.toAnonymousRegressorSchema(DataType.DOUBLE);

	Model linearModel = LinearModelUtil.createRegression(this, model.coefficients(), model.intercept(), segmentSchema)
		.setOutput(ModelUtil.createPredictedOutput(FieldName.create("margin"), OpType.CONTINUOUS, DataType.DOUBLE, transformation));

	return MiningModelUtil.createBinaryLogisticClassification(linearModel, 1d, 0d, RegressionModel.NormalizationMethod.NONE, false, schema);
}
 
Example 3
@Override
public ClusteringModel encodeModel(Schema schema){
	KMeansModel model = getTransformer();

	List<Cluster> clusters = new ArrayList<>();

	Vector[] clusterCenters = model.clusterCenters();
	for(int i = 0; i < clusterCenters.length; i++){
		Cluster cluster = new Cluster(PMMLUtil.createRealArray(VectorUtil.toList(clusterCenters[i])))
			.setId(String.valueOf(i));

		clusters.add(cluster);
	}

	ComparisonMeasure comparisonMeasure = new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE, new SquaredEuclidean())
		.setCompareFunction(CompareFunction.ABS_DIFF);

	return new ClusteringModel(MiningFunction.CLUSTERING, ClusteringModel.ModelClass.CENTER_BASED, clusters.size(), ModelUtil.createMiningSchema(schema.getLabel()), comparisonMeasure, ClusteringModelUtil.createClusteringFields(schema.getFeatures()), clusters);
}
 
Example 4
@Test
public void translateArithmeticExpression(){
	String string = "-((x1 - 1) / (x2 + 1))";

	Apply expected = PMMLUtil.createApply(PMMLFunctions.MULTIPLY)
		.addExpressions(PMMLUtil.createConstant(-1))
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.DIVIDE)
			.addExpressions(PMMLUtil.createApply(PMMLFunctions.SUBTRACT)
				.addExpressions(new FieldRef(FieldName.create("x1")), PMMLUtil.createConstant(1, DataType.DOUBLE))
			)
			.addExpressions(PMMLUtil.createApply(PMMLFunctions.ADD)
				.addExpressions(new FieldRef(FieldName.create("x2")), PMMLUtil.createConstant(1, DataType.DOUBLE))
			)
		);

	checkExpression(expected, string);
}
 
Example 5
@Test
public void translateCaseWhenExpression(){
	String string = "CASE WHEN x1 < 0 THEN x1 WHEN x2 > 0 THEN x2 ELSE 0 END";

	FieldRef first = new FieldRef(FieldName.create("x1"));
	FieldRef second = new FieldRef(FieldName.create("x2"));

	Constant zero = PMMLUtil.createConstant(0, DataType.DOUBLE);

	Apply expected = PMMLUtil.createApply(PMMLFunctions.IF)
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.LESSTHAN)
			.addExpressions(first, zero)
		)
		.addExpressions(first)
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.IF)
			.addExpressions(PMMLUtil.createApply(PMMLFunctions.GREATERTHAN)
				.addExpressions(second, zero)
			)
			.addExpressions(second)
			.addExpressions(zero)
		);

	checkExpression(expected, string);
}
 
Example 6
@Test
public void translateIfExpression(){
	String string = "if(status in (-1, 1), x1 != 0, x2 != 0)";

	Apply expected = PMMLUtil.createApply(PMMLFunctions.IF)
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.ISIN)
			.addExpressions(new FieldRef(FieldName.create("status")))
			.addExpressions(PMMLUtil.createConstant(-1), PMMLUtil.createConstant(1))
		)
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.NOTEQUAL)
			.addExpressions(new FieldRef(FieldName.create("x1")), PMMLUtil.createConstant(0, DataType.DOUBLE))
		)
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.NOTEQUAL)
			.addExpressions(new FieldRef(FieldName.create("x2")), PMMLUtil.createConstant(0, DataType.DOUBLE))
		);

	checkExpression(expected, string);
}
 
Example 7
@Override
public MiningModel encodeMiningModel(List<RegTree> trees, List<Float> weights, float base_score, Integer ntreeLimit, Schema schema){
	Schema segmentSchema = schema.toAnonymousRegressorSchema(DataType.FLOAT);

	Transformation transformation = new FunctionTransformation(PMMLFunctions.THRESHOLD){

		@Override
		public FieldName getName(FieldName name){
			return FieldName.create("hinge(" + name + ")");
		}

		@Override
		public Expression createExpression(FieldRef fieldRef){
			Apply apply = (Apply)super.createExpression(fieldRef);

			apply.addExpressions(PMMLUtil.createConstant(0f));

			return apply;
		}
	};

	MiningModel miningModel = createMiningModel(trees, weights, base_score, ntreeLimit, segmentSchema)
		.setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue"), OpType.CONTINUOUS, DataType.FLOAT, transformation));

	return MiningModelUtil.createBinaryLogisticClassification(miningModel, 1d, 0d, RegressionModel.NormalizationMethod.NONE, true, schema);
}
 
Example 8
@Override
public DefineFunction encodeDefineFunction(){
	TfidfTransformer transformer = getTransformer();

	DefineFunction defineFunction = super.encodeDefineFunction();

	Expression expression = defineFunction.getExpression();

	Boolean sublinearTf = transformer.getSublinearTf();
	if(sublinearTf){
		expression = PMMLUtil.createApply(PMMLFunctions.ADD, PMMLUtil.createApply(PMMLFunctions.LN, expression), PMMLUtil.createConstant(1d));
	} // End if

	Boolean useIdf = transformer.getUseIdf();
	if(useIdf){
		ParameterField weight = new ParameterField(FieldName.create("weight"));

		defineFunction.addParameterFields(weight);

		expression = PMMLUtil.createApply(PMMLFunctions.MULTIPLY, expression, new FieldRef(weight.getName()));
	}

	defineFunction.setExpression(expression);

	return defineFunction;
}
 
Example 9
static
public Feature encodeIndicatorFeature(Feature feature, Object missingValue, SkLearnEncoder encoder){
	Expression expression = feature.ref();

	if(missingValue != null){
		expression = PMMLUtil.createApply(PMMLFunctions.EQUAL, expression, PMMLUtil.createConstant(missingValue, feature.getDataType()));
	} else

	{
		expression = PMMLUtil.createApply(PMMLFunctions.ISMISSING, expression);
	}

	DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("missing_indicator", feature), OpType.CATEGORICAL, DataType.BOOLEAN, expression);

	return new BooleanFeature(encoder, derivedField);
}
 
Example 10
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	String function = getFunction();

	if(features.size() <= 1){
		return features;
	}

	Apply apply = PMMLUtil.createApply(translateFunction(function));

	for(Feature feature : features){
		apply.addExpressions(feature.ref());
	}

	FieldName name = FeatureUtil.createName(function, features);

	DerivedField derivedField = encoder.createDerivedField(name, OpType.CONTINUOUS, DataType.DOUBLE, apply);

	return Collections.singletonList(new ContinuousFeature(encoder, derivedField));
}
 
Example 11
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	String pattern = getPattern();
	String replacement = getReplacement();

	ClassDictUtil.checkSize(1, features);

	Feature feature = features.get(0);
	if(!(DataType.STRING).equals(feature.getDataType())){
		throw new IllegalArgumentException();
	}

	Apply apply = PMMLUtil.createApply(PMMLFunctions.REPLACE)
		.addExpressions(feature.ref())
		.addExpressions(PMMLUtil.createConstant(pattern, DataType.STRING), PMMLUtil.createConstant(replacement, DataType.STRING));

	DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("replace", feature), OpType.CATEGORICAL, DataType.STRING, apply);

	return Collections.singletonList(new StringFeature(encoder, derivedField));
}
 
Example 12
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	Integer begin = getBegin();
	Integer end = getEnd();

	if((begin < 0) || (end < begin)){
		throw new IllegalArgumentException();
	}

	ClassDictUtil.checkSize(1, features);

	Feature feature = features.get(0);
	if(!(DataType.STRING).equals(feature.getDataType())){
		throw new IllegalArgumentException();
	}

	Apply apply = PMMLUtil.createApply(PMMLFunctions.SUBSTRING)
		.addExpressions(feature.ref())
		.addExpressions(PMMLUtil.createConstant(begin + 1, DataType.INTEGER), PMMLUtil.createConstant((end - begin), DataType.INTEGER));

	DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("substring", feature), OpType.CATEGORICAL, DataType.STRING, apply);

	return Collections.singletonList(new StringFeature(encoder, derivedField));
}
 
Example 13
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	String pattern = getPattern();

	ClassDictUtil.checkSize(1, features);

	Feature feature = features.get(0);
	if(!(DataType.STRING).equals(feature.getDataType())){
		throw new IllegalArgumentException();
	}

	Apply apply = PMMLUtil.createApply(PMMLFunctions.MATCHES)
		.addExpressions(feature.ref())
		.addExpressions(PMMLUtil.createConstant(pattern, DataType.STRING));

	DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("matches", feature), OpType.CATEGORICAL, DataType.BOOLEAN, apply);

	return Collections.singletonList(new BooleanFeature(encoder, derivedField));
}
 
Example 14
Source Project: jpmml-r   Source File: FormulaUtil.java    License: GNU Affero General Public License v3.0 6 votes vote down vote up
static
private Expression encodeIfElseExpression(FunctionExpression functionExpression, VariableMap expressionFields, RExpEncoder encoder){
	FunctionExpression.Argument testArgument = functionExpression.getArgument("test", 0);

	expressionFields.putAll(testArgument);

	FunctionExpression.Argument yesArgument = functionExpression.getArgument("yes", 1);
	FunctionExpression.Argument noArgument = functionExpression.getArgument("no", 2);

	expressionFields.putAll(yesArgument);
	expressionFields.putAll(noArgument);

	// XXX: "Missing values in test give missing values in the result"
	Apply apply = PMMLUtil.createApply(PMMLFunctions.IF)
		.addExpressions(prepareExpression(testArgument, expressionFields, encoder))
		.addExpressions(prepareExpression(yesArgument, expressionFields, encoder), prepareExpression(noArgument, expressionFields, encoder));

	return apply;
}
 
Example 15
Source Project: jpmml-r   Source File: FormulaUtil.java    License: GNU Affero General Public License v3.0 6 votes vote down vote up
static
private MapValues createMapValues(FieldName name, Map<String, String> mapping, List<String> categories){
	Set<String> inputs = new LinkedHashSet<>(mapping.keySet());
	Set<String> outputs = new LinkedHashSet<>(mapping.values());

	for(String category : categories){

		// Assume disjoint input and output value spaces
		if(outputs.contains(category)){
			continue;
		}

		mapping.put(category, category);
	}

	return PMMLUtil.createMapValues(name, mapping);
}
 
Example 16
static
private Apply createHingeFunction(int dir, Feature feature, double cut){
	Expression expression;

	switch(dir){
		case -1:
			expression = PMMLUtil.createApply(PMMLFunctions.SUBTRACT, PMMLUtil.createConstant(cut), feature.ref());
			break;
		case 1:
			expression = PMMLUtil.createApply(PMMLFunctions.SUBTRACT, feature.ref(), PMMLUtil.createConstant(cut));
			break;
		default:
			throw new IllegalArgumentException();
	}

	return PMMLUtil.createApply(PMMLFunctions.MAX, expression, PMMLUtil.createConstant(0d));
}
 
Example 17
@Test
public void translate(){
	String string = "(1.0 + log(A / B)) ^ 2";

	Expression expected = PMMLUtil.createApply(PMMLFunctions.POW)
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.ADD)
			.addExpressions(PMMLUtil.createConstant("1.0", DataType.DOUBLE))
			.addExpressions(PMMLUtil.createApply(PMMLFunctions.LN)
				.addExpressions(PMMLUtil.createApply(PMMLFunctions.DIVIDE)
					.addExpressions(new FieldRef(FieldName.create("A")), new FieldRef(FieldName.create("B")))
				)
			)
		)
		.addExpressions(PMMLUtil.createConstant("2", DataType.INTEGER));

	Expression actual = ExpressionTranslator.translateExpression(string);

	assertTrue(ReflectionUtil.equals(expected, actual));
}
 
Example 18
@Test
public void translateLogicalExpression(){
	String string = "a >= 0.0 & b >= 0.0 | c <= 0.0";

	Expression expected = PMMLUtil.createApply(PMMLFunctions.OR)
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.AND)
			.addExpressions(PMMLUtil.createApply(PMMLFunctions.GREATEROREQUAL)
				.addExpressions(new FieldRef(FieldName.create("a")), PMMLUtil.createConstant("0.0", DataType.DOUBLE))
			)
			.addExpressions(PMMLUtil.createApply(PMMLFunctions.GREATEROREQUAL)
				.addExpressions(new FieldRef(FieldName.create("b")), PMMLUtil.createConstant("0.0", DataType.DOUBLE))
			)
		)
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.LESSOREQUAL)
			.addExpressions(new FieldRef(FieldName.create("c")), PMMLUtil.createConstant("0.0", DataType.DOUBLE))
		);

	Expression actual = ExpressionTranslator.translateExpression(string);

	assertTrue(ReflectionUtil.equals(expected, actual));
}
 
Example 19
@Test
public void translateRelationalExpression(){
	String string = "if(x < 0) \"negative\" else if(x > 0) \"positive\" else \"zero\"";

	Expression expected = PMMLUtil.createApply(PMMLFunctions.IF)
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.LESSTHAN)
			.addExpressions(new FieldRef(FieldName.create("x")), PMMLUtil.createConstant("0", DataType.INTEGER))
		)
		.addExpressions(PMMLUtil.createConstant("negative", DataType.STRING))
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.IF)
			.addExpressions(PMMLUtil.createApply(PMMLFunctions.GREATERTHAN)
				.addExpressions(new FieldRef(FieldName.create("x")), PMMLUtil.createConstant("0", DataType.INTEGER))
			)
			.addExpressions(PMMLUtil.createConstant("positive", DataType.STRING))
			.addExpressions(PMMLUtil.createConstant("zero", DataType.STRING))
		);

	Expression actual = ExpressionTranslator.translateExpression(string);

	assertTrue(ReflectionUtil.equals(expected, actual));
}
 
Example 20
@Test
public void translateArithmeticExpressionChain(){
	String string = "A + B - X + C";

	Expression expected = PMMLUtil.createApply(PMMLFunctions.ADD)
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.SUBTRACT)
			.addExpressions(PMMLUtil.createApply(PMMLFunctions.ADD)
				.addExpressions(new FieldRef(FieldName.create("A")), new FieldRef(FieldName.create("B")))
			)
			.addExpressions(new FieldRef(FieldName.create("X")))
		)
		.addExpressions(new FieldRef(FieldName.create("C")));

	Expression actual = ExpressionTranslator.translateExpression(string);

	assertTrue(ReflectionUtil.equals(expected, actual));
}
 
Example 21
@Override
public VisitorAction visit(DataField dataField){
	DataType dataType = dataField.getDataType();

	switch(dataType){
		case FLOAT:
		case DOUBLE:
			PMMLUtil.addValues(dataField, Collections.singletonList("NaN"), Property.MISSING);
			break;
		default:
			break;
	}

	return super.visit(dataField);
}
 
Example 22
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	CountVectorizerModel transformer = getTransformer();

	DocumentFeature documentFeature = (DocumentFeature)encoder.getOnlyFeature(transformer.getInputCol());

	ParameterField documentField = new ParameterField(FieldName.create("document"));

	ParameterField termField = new ParameterField(FieldName.create("term"));

	TextIndex textIndex = new TextIndex(documentField.getName(), new FieldRef(termField.getName()))
		.setTokenize(Boolean.TRUE)
		.setWordSeparatorCharacterRE(documentFeature.getWordSeparatorRE())
		.setLocalTermWeights(transformer.getBinary() ? TextIndex.LocalTermWeights.BINARY : null);

	Set<DocumentFeature.StopWordSet> stopWordSets = documentFeature.getStopWordSets();
	for(DocumentFeature.StopWordSet stopWordSet : stopWordSets){

		if(stopWordSet.isEmpty()){
			continue;
		}

		String tokenRE;

		String wordSeparatorRE = documentFeature.getWordSeparatorRE();
		switch(wordSeparatorRE){
			case "\\s+":
				tokenRE = "(^|\\s+)\\p{Punct}*(" + JOINER.join(stopWordSet) + ")\\p{Punct}*(\\s+|$)";
				break;
			case "\\W+":
				tokenRE = "(\\W+)(" + JOINER.join(stopWordSet) + ")(\\W+)";
				break;
			default:
				throw new IllegalArgumentException("Expected \"\\s+\" or \"\\W+\" as splitter regex pattern, got \"" + wordSeparatorRE + "\"");
		}

		Map<String, List<String>> data = new LinkedHashMap<>();
		data.put("string", Collections.singletonList(tokenRE));
		data.put("stem", Collections.singletonList(" "));
		data.put("regex", Collections.singletonList("true"));

		TextIndexNormalization textIndexNormalization = new TextIndexNormalization(null, PMMLUtil.createInlineTable(data))
			.setCaseSensitive(stopWordSet.isCaseSensitive())
			.setRecursive(Boolean.TRUE); // Handles consecutive matches. See http://stackoverflow.com/a/25085385

		textIndex.addTextIndexNormalizations(textIndexNormalization);
	}

	DefineFunction defineFunction = new DefineFunction("tf" + "@" + String.valueOf(CountVectorizerModelConverter.SEQUENCE.getAndIncrement()), OpType.CONTINUOUS, DataType.INTEGER, null, textIndex)
		.addParameterFields(documentField, termField);

	encoder.addDefineFunction(defineFunction);

	List<Feature> result = new ArrayList<>();

	String[] vocabulary = transformer.vocabulary();
	for(int i = 0; i < vocabulary.length; i++){
		String term = vocabulary[i];

		if(TermUtil.hasPunctuation(term)){
			throw new IllegalArgumentException("Punctuated vocabulary terms (" + term + ") are not supported");
		}

		result.add(new TermFeature(encoder, defineFunction, documentFeature, term));
	}

	return result;
}
 
Example 23
@Override
public Apply createApply(){
	Number weight = getWeight();

	Apply apply = super.createApply()
		.addExpressions(PMMLUtil.createConstant(weight));

	return apply;
}
 
Example 24
public Apply createApply(){
	DefineFunction defineFunction = getDefineFunction();
	Feature feature = getFeature();
	String value = getValue();

	Constant constant = PMMLUtil.createConstant(value, DataType.STRING);

	return PMMLUtil.createApply(defineFunction.getName(), feature.ref(), constant);
}
 
Example 25
@Test
public void translateLogicalExpression(){
	String string = "isnull(x1) and not(isnotnull(x2))";

	FieldRef first = new FieldRef(FieldName.create("x1"));
	FieldRef second = new FieldRef(FieldName.create("x2"));

	Apply expected = PMMLUtil.createApply(PMMLFunctions.AND)
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.ISMISSING)
			.addExpressions(first)
		)
		// "not(isnotnull(..)) -> "isnull(..)"
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.ISMISSING)
			.addExpressions(second)
		);

	checkExpression(expected, string);

	string = "(x1 <= 0) or (x2 >= 0)";

	expected = PMMLUtil.createApply(PMMLFunctions.OR)
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.LESSOREQUAL)
			.addExpressions(first, PMMLUtil.createConstant(0, DataType.DOUBLE))
		)
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.GREATEROREQUAL)
			.addExpressions(second, PMMLUtil.createConstant(0, DataType.DOUBLE))
		);

	checkExpression(expected, string);
}
 
Example 26
@Override
public Apply encodeApply(String function, Feature feature, int index, String term){
	TfidfTransformer transformer = getTransformer();

	Apply apply = super.encodeApply(function, feature, index, term);

	Boolean useIdf = transformer.getUseIdf();
	if(useIdf){
		Number weight = transformer.getWeight(index);

		apply.addExpressions(PMMLUtil.createConstant(weight));
	}

	return apply;
}
 
Example 27
@Override
public SupportVectorMachineModel encodeModel(Schema schema){
	Transformation outlier = new OutlierTransformation(){

		@Override
		public Expression createExpression(FieldRef fieldRef){
			return PMMLUtil.createApply(PMMLFunctions.LESSOREQUAL, fieldRef, PMMLUtil.createConstant(0d));
		}
	};

	SupportVectorMachineModel supportVectorMachineModel = super.encodeModel(schema)
		.setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction"), OpType.CONTINUOUS, DataType.DOUBLE, outlier));

	Output output = supportVectorMachineModel.getOutput();

	List<OutputField> outputFields = output.getOutputFields();
	if(outputFields.size() != 2){
		throw new IllegalArgumentException();
	}

	OutputField decisionFunctionOutputField = outputFields.get(0);

	if(!decisionFunctionOutputField.isFinalResult()){
		decisionFunctionOutputField.setFinalResult(true);
	}

	return supportVectorMachineModel;
}
 
Example 28
Source Project: jpmml-sklearn   Source File: KMeans.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public ClusteringModel encodeModel(Schema schema){
	int[] shape = getClusterCentersShape();

	int numberOfClusters = shape[0];
	int numberOfFeatures = shape[1];

	List<? extends Number> clusterCenters = getClusterCenters();
	List<Integer> labels = getLabels();

	Multiset<Integer> labelCounts = HashMultiset.create();

	if(labels != null){
		labelCounts.addAll(labels);
	}

	List<Cluster> clusters = new ArrayList<>();

	for(int i = 0; i < numberOfClusters; i++){
		Cluster cluster = new Cluster(PMMLUtil.createRealArray(CMatrixUtil.getRow(clusterCenters, numberOfClusters, numberOfFeatures, i)))
			.setId(String.valueOf(i))
			.setSize((labelCounts.size () > 0 ? labelCounts.count(i) : null));

		clusters.add(cluster);
	}

	ComparisonMeasure comparisonMeasure = new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE, new SquaredEuclidean())
		.setCompareFunction(CompareFunction.ABS_DIFF);

	ClusteringModel clusteringModel = new ClusteringModel(MiningFunction.CLUSTERING, ClusteringModel.ModelClass.CENTER_BASED, numberOfClusters, ModelUtil.createMiningSchema(schema.getLabel()), comparisonMeasure, ClusteringModelUtil.createClusteringFields(schema.getFeatures()), clusters)
		.setOutput(ClusteringModelUtil.createOutput(FieldName.create("Cluster"), DataType.DOUBLE, clusters));

	return clusteringModel;
}
 
Example 29
static
public Feature encodeIndexFeature(Feature feature, List<?> categories, List<? extends Number> indexCategories, Number mapMissingTo, Number defaultValue, DataType dataType, SkLearnEncoder encoder){
	ClassDictUtil.checkSize(categories, indexCategories);

	encoder.toCategorical(feature.getName(), categories);

	Supplier<MapValues> mapValuesSupplier = () -> {
		MapValues mapValues = PMMLUtil.createMapValues(feature.getName(), categories, indexCategories)
			.setMapMissingTo(mapMissingTo)
			.setDefaultValue(defaultValue);

		return mapValues;
	};

	DerivedField derivedField = encoder.ensureDerivedField(FeatureUtil.createName("encoder", feature), OpType.CATEGORICAL, dataType, mapValuesSupplier);

	Feature encodedFeature = new IndexFeature(encoder, derivedField, indexCategories);

	Feature result = new CategoricalFeature(encoder, feature, categories){

		@Override
		public ContinuousFeature toContinuousFeature(){
			return encodedFeature.toContinuousFeature();
		}
	};

	return result;
}
 
Example 30
static
public DiscrStats createDiscrStats(DataType dataType, Object[] objects){
	List<Object> values = (List)asArray(objects[0]);
	List<Integer> counts = ValueUtil.asIntegers((List)asArray(objects[1]));

	ClassDictUtil.checkSize(values, counts);

	DiscrStats discrStats = new DiscrStats()
		.addArrays(PMMLUtil.createStringArray(standardizeValues(dataType, values)), PMMLUtil.createIntArray(counts));

	return discrStats;
}