org.dmg.pmml.DataType Java Examples

The following examples show how to use org.dmg.pmml.DataType. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Composite.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public DataType getDataType(){

	if(hasTransformers()){
		List<? extends Transformer> transformers = getTransformers();

		for(Transformer transformer : transformers){
			return transformer.getDataType();
		}
	} // End if

	if(hasFinalEstimator()){
		Estimator estimator = getFinalEstimator();

		return estimator.getDataType();
	}

	throw new UnsupportedOperationException();
}
 
Example #2
Source File: Transformer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
public DataField updateDataField(DataField dataField, OpType opType, DataType dataType, SkLearnEncoder encoder){
	FieldName name = dataField.getName();

	if(encoder.isFrozen(name)){
		return dataField;
	}

	switch(dataType){
		case DOUBLE:
			// If the DataField element already specifies a non-default data type, then keep it
			if(!(DataType.DOUBLE).equals(dataField.getDataType())){
				dataType = dataField.getDataType();
			}
			break;
	}

	dataField
		.setOpType(opType)
		.setDataType(dataType);

	return dataField;
}
 
Example #3
Source File: TreeUtil.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
static
private Schema toTreeModelSchema(DataType dataType, Schema schema){
	Function<Feature, Feature> function = new Function<Feature, Feature>(){

		@Override
		public Feature apply(Feature feature){

			if(feature instanceof BinaryFeature){
				BinaryFeature binaryFeature = (BinaryFeature)feature;

				return binaryFeature;
			} else

			{
				ContinuousFeature continuousFeature = feature.toContinuousFeature(dataType);

				return continuousFeature;
			}
		}
	};

	return schema.toTransformedSchema(function);
}
 
Example #4
Source File: TypeUtil.java    From jpmml-evaluator with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * @see DataType#DATE
 */
static
private LocalDate toDate(Object value){

	if(value instanceof LocalDate){
		return (LocalDate)value;
	} else

	if(value instanceof LocalDateTime){
		LocalDateTime instant = (LocalDateTime)value;

		return instant.toLocalDate();
	}

	throw new TypeCheckException(DataType.DATE, value);
}
 
Example #5
Source File: RExpUtil.java    From jpmml-r with GNU Affero General Public License v3.0 6 votes vote down vote up
static
public DataType getDataType(String type){

	switch(type){
		case "character":
		case "factor":
			return DataType.STRING;
		case "numeric":
			return DataType.DOUBLE;
		case "logical":
			return DataType.BOOLEAN;
		default:
			break;
	}

	throw new IllegalArgumentException(type);
}
 
Example #6
Source File: TransformerUtil.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
static
public OpType getOpType(DataType dataType){

	switch(dataType){
		case STRING:
			return OpType.CATEGORICAL;
		case INTEGER:
		case FLOAT:
		case DOUBLE:
			return OpType.CONTINUOUS;
		case BOOLEAN:
			return OpType.CATEGORICAL;
		case DATE:
		case DATE_TIME:
			return OpType.ORDINAL;
		default:
			throw new IllegalArgumentException();
	}
}
 
Example #7
Source File: RegexTokenizerConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	RegexTokenizer transformer = getTransformer();

	if(!transformer.getGaps()){
		throw new IllegalArgumentException("Expected splitter mode, got token matching mode");
	} // End if

	if(transformer.getMinTokenLength() != 1){
		throw new IllegalArgumentException("Expected 1 as minimum token length, got " + transformer.getMinTokenLength() + " as minimum token length");
	}

	Feature feature = encoder.getOnlyFeature(transformer.getInputCol());

	Field<?> field = feature.getField();

	if(transformer.getToLowercase()){
		Apply apply = PMMLUtil.createApply(PMMLFunctions.LOWERCASE, feature.ref());

		field = encoder.createDerivedField(FeatureUtil.createName("lowercase", feature), OpType.CATEGORICAL, DataType.STRING, apply);
	}

	return Collections.singletonList(new DocumentFeature(encoder, field, transformer.getPattern()));
}
 
Example #8
Source File: TypeUtilTest.java    From jpmml-evaluator with GNU Affero General Public License v3.0 6 votes vote down vote up
@Test
public void getConstantDataType(){
	assertEquals(DataType.STRING, TypeUtil.getConstantDataType(""));

	assertEquals(DataType.INTEGER, TypeUtil.getConstantDataType("-1"));
	assertEquals(DataType.INTEGER, TypeUtil.getConstantDataType("1"));
	assertEquals(DataType.INTEGER, TypeUtil.getConstantDataType("+1"));
	assertEquals(DataType.STRING, TypeUtil.getConstantDataType("1E0"));
	assertEquals(DataType.STRING, TypeUtil.getConstantDataType("1X"));

	assertEquals(DataType.DOUBLE, TypeUtil.getConstantDataType("-1.0"));
	assertEquals(DataType.DOUBLE, TypeUtil.getConstantDataType("1.0"));
	assertEquals(DataType.DOUBLE, TypeUtil.getConstantDataType("+1.0"));
	assertEquals(DataType.DOUBLE, TypeUtil.getConstantDataType("1.0E-1"));
	assertEquals(DataType.DOUBLE, TypeUtil.getConstantDataType("1.0E1"));
	assertEquals(DataType.DOUBLE, TypeUtil.getConstantDataType("1.0E+1"));
	assertEquals(DataType.STRING, TypeUtil.getConstantDataType("1.0X"));
}
 
Example #9
Source File: Classification.java    From jpmml-xgboost with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public Label encodeLabel(FieldName targetField, List<?> targetCategories, PMMLEncoder encoder){
	DataField dataField;

	if(targetCategories == null){
		targetCategories = LabelUtil.createTargetCategories(this.num_class);

		dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.INTEGER, targetCategories);
	} else

	{
		if(targetCategories.size() != this.num_class){
			throw new IllegalArgumentException("Expected " + this.num_class + " target categories, got " + targetCategories.size() + " target categories");
		}

		dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.STRING, targetCategories);
	}

	return new CategoricalLabel(dataField);
}
 
Example #10
Source File: FunctionTransformer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	UFunc func = getFunc();

	if(func == null){
		return features;
	}

	List<Feature> result = new ArrayList<>();

	for(int i = 0; i < features.size(); i++){
		ContinuousFeature continuousFeature = (features.get(i)).toContinuousFeature();

		DerivedField derivedField = encoder.ensureDerivedField(FeatureUtil.createName(func.getName(), continuousFeature), OpType.CONTINUOUS, DataType.DOUBLE, () -> UFuncUtil.encodeUFunc(func, Collections.singletonList(continuousFeature.ref())));

		result.add(new ContinuousFeature(encoder, derivedField));
	}

	return result;
}
 
Example #11
Source File: OrdinalEncoder.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public DataType getDataType(){
	List<List<?>> categories = getCategories();

	DataType result = null;

	for(int i = 0; i < categories.size(); i++){
		List<?> featureCategories = categories.get(i);

		DataType dataType = TypeUtil.getDataType(featureCategories, null);

		if(result == null){
			result = dataType;
		} else

		{
			if(!(result).equals(dataType)){
				throw new UnsupportedOperationException();
			}
		}
	}

	if(result == null){
		result = DataType.STRING;
	}

	return result;
}
 
Example #12
Source File: RExpEncoder.java    From jpmml-r with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public DataField createDataField(FieldName name, OpType opType, DataType dataType, List<?> values){

	if(dataType == null){
		dataType = TypeUtil.getDataType(values);
	}

	return super.createDataField(name, opType, dataType, values);
}
 
Example #13
Source File: RichDataFieldTest.java    From jpmml-evaluator with GNU Affero General Public License v3.0 5 votes vote down vote up
@Test
public void getValueMapping(){
	Value invalidValue = createValue("0", Value.Property.INVALID);
	Value validValueOne = createValue("1", Value.Property.VALID);
	Value validValueTwo = createValue("2", null);
	Value validValueThree = createValue("3", null);
	Value missingValue = createValue("N/A", Value.Property.MISSING);

	DataField dataField = new DataField(FieldName.create("x"), OpType.CATEGORICAL, DataType.STRING)
		.addValues(invalidValue, validValueOne, validValueTwo, validValueThree, missingValue);

	RichDataField richDataField = new RichDataField(dataField);

	Map<?, Integer> valueMap = richDataField.getMap();

	assertEquals(5, valueMap.size());

	assertEquals(FieldValue.STATUS_UNKNOWN_INVALID, valueMap.get("0"));
	assertEquals((Integer)1, valueMap.get("1"));
	assertEquals((Integer)2, valueMap.get("2"));
	assertEquals((Integer)3, valueMap.get("3"));
	assertEquals(FieldValue.STATUS_MISSING, valueMap.get("N/A"));

	dataField.setDataType(DataType.INTEGER);

	richDataField = new RichDataField(dataField);

	valueMap = richDataField.getMap();

	assertEquals(4, valueMap.size());

	assertEquals(FieldValue.STATUS_UNKNOWN_INVALID, valueMap.get(0));
	assertEquals((Integer)1, valueMap.get(1));
	assertEquals((Integer)2, valueMap.get(2));
	assertEquals((Integer)3, valueMap.get(3));
}
 
Example #14
Source File: ContinuousValue.java    From jpmml-evaluator with GNU Affero General Public License v3.0 5 votes vote down vote up
ContinuousFloat(Object value){
	super(DataType.FLOAT, value);

	Float floatValue = (Float)getValue();
	if(floatValue.isNaN()){
		setValid(false);
	}
}
 
Example #15
Source File: ExpressionTranslatorTest.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
@Test
public void translateLogicalExpression(){
	String string = "isnull(x1) and not(isnotnull(x2))";

	FieldRef first = new FieldRef(FieldName.create("x1"));
	FieldRef second = new FieldRef(FieldName.create("x2"));

	Apply expected = PMMLUtil.createApply(PMMLFunctions.AND)
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.ISMISSING)
			.addExpressions(first)
		)
		// "not(isnotnull(..)) -> "isnull(..)"
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.ISMISSING)
			.addExpressions(second)
		);

	checkExpression(expected, string);

	string = "(x1 <= 0) or (x2 >= 0)";

	expected = PMMLUtil.createApply(PMMLFunctions.OR)
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.LESSOREQUAL)
			.addExpressions(first, PMMLUtil.createConstant(0, DataType.DOUBLE))
		)
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.GREATEROREQUAL)
			.addExpressions(second, PMMLUtil.createConstant(0, DataType.DOUBLE))
		);

	checkExpression(expected, string);
}
 
Example #16
Source File: TermFeature.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
public Apply createApply(){
	DefineFunction defineFunction = getDefineFunction();
	Feature feature = getFeature();
	String value = getValue();

	Constant constant = PMMLUtil.createConstant(value, DataType.STRING);

	return PMMLUtil.createApply(defineFunction.getName(), feature.ref(), constant);
}
 
Example #17
Source File: KMeans.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public ClusteringModel encodeModel(Schema schema){
	int[] shape = getClusterCentersShape();

	int numberOfClusters = shape[0];
	int numberOfFeatures = shape[1];

	List<? extends Number> clusterCenters = getClusterCenters();
	List<Integer> labels = getLabels();

	Multiset<Integer> labelCounts = HashMultiset.create();

	if(labels != null){
		labelCounts.addAll(labels);
	}

	List<Cluster> clusters = new ArrayList<>();

	for(int i = 0; i < numberOfClusters; i++){
		Cluster cluster = new Cluster(PMMLUtil.createRealArray(CMatrixUtil.getRow(clusterCenters, numberOfClusters, numberOfFeatures, i)))
			.setId(String.valueOf(i))
			.setSize((labelCounts.size () > 0 ? labelCounts.count(i) : null));

		clusters.add(cluster);
	}

	ComparisonMeasure comparisonMeasure = new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE, new SquaredEuclidean())
		.setCompareFunction(CompareFunction.ABS_DIFF);

	ClusteringModel clusteringModel = new ClusteringModel(MiningFunction.CLUSTERING, ClusteringModel.ModelClass.CENTER_BASED, numberOfClusters, ModelUtil.createMiningSchema(schema.getLabel()), comparisonMeasure, ClusteringModelUtil.createClusteringFields(schema.getFeatures()), clusters)
		.setOutput(ClusteringModelUtil.createOutput(FieldName.create("Cluster"), DataType.DOUBLE, clusters));

	return clusteringModel;
}
 
Example #18
Source File: AppPMMLUtilsTest.java    From oryx with Apache License 2.0 5 votes vote down vote up
private static void checkDataField(DataField field, String name, Boolean categorical) {
  assertEquals(name, field.getName().getValue());
  if (categorical == null) {
    assertNull(field.getOpType());
    assertNull(field.getDataType());
  } else if (categorical) {
    assertEquals(OpType.CATEGORICAL, field.getOpType());
    assertEquals(DataType.STRING, field.getDataType());
  } else {
    assertEquals(OpType.CONTINUOUS, field.getOpType());
    assertEquals(DataType.DOUBLE, field.getDataType());
  }
}
 
Example #19
Source File: GBMConverter.java    From jpmml-r with GNU Affero General Public License v3.0 5 votes vote down vote up
private MiningModel encodeBinaryClassification(List<TreeModel> treeModels, Double initF, double coefficient, Schema schema){
	Schema segmentSchema = schema.toAnonymousRegressorSchema(DataType.DOUBLE);

	MiningModel miningModel = createMiningModel(treeModels, initF, segmentSchema)
		.setOutput(ModelUtil.createPredictedOutput(FieldName.create("gbmValue"), OpType.CONTINUOUS, DataType.DOUBLE));

	return MiningModelUtil.createBinaryLogisticClassification(miningModel, -coefficient, 0d, RegressionModel.NormalizationMethod.LOGIT, true, schema);
}
 
Example #20
Source File: IndexToStringConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	IndexToString transformer = getTransformer();

	DataField dataField = encoder.createDataField(formatName(transformer), OpType.CATEGORICAL, DataType.STRING, Arrays.asList(transformer.getLabels()));

	return Collections.singletonList(new CategoricalFeature(encoder, dataField));
}
 
Example #21
Source File: NaNAsMissingDecorator.java    From jpmml-lightgbm with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public VisitorAction visit(DataField dataField){
	DataType dataType = dataField.getDataType();

	switch(dataType){
		case FLOAT:
		case DOUBLE:
			PMMLUtil.addValues(dataField, Collections.singletonList("NaN"), Property.MISSING);
			break;
		default:
			break;
	}

	return super.visit(dataField);
}
 
Example #22
Source File: LogisticRegression.java    From jpmml-xgboost with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public MiningModel encodeMiningModel(List<RegTree> trees, List<Float> weights, float base_score, Integer ntreeLimit, Schema schema){
	Schema segmentSchema = schema.toAnonymousSchema();

	MiningModel miningModel = createMiningModel(trees, weights, base_score, ntreeLimit, segmentSchema)
		.setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue"), OpType.CONTINUOUS, DataType.FLOAT));

	return MiningModelUtil.createRegression(miningModel, RegressionModel.NormalizationMethod.LOGIT, schema);
}
 
Example #23
Source File: Regression.java    From jpmml-lightgbm with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public Label encodeLabel(FieldName targetField, List<?> targetCategories, PMMLEncoder encoder){

	if(targetCategories != null && targetCategories.size() > 0){
		throw new IllegalArgumentException("Regression requires zero target categories");
	}

	DataField dataField = encoder.createDataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE);

	return new ContinuousLabel(dataField);
}
 
Example #24
Source File: TypeUtil.java    From jpmml-evaluator with GNU Affero General Public License v3.0 5 votes vote down vote up
static
public DataType getConstantDataType(String value){

	if(("").equals(value)){
		return DataType.STRING;
	} else

	if(("NaN").equalsIgnoreCase(value) || ("INF").equalsIgnoreCase(value) || ("-INF").equalsIgnoreCase(value)){
		return DataType.DOUBLE;
	}

	try {
		if(value.indexOf('.') > -1){
			Double.parseDouble(value);

			return DataType.DOUBLE;
		} else

		{
			Long.parseLong(value);

			return DataType.INTEGER;
		}
	} catch(NumberFormatException nfe){
		return DataType.STRING;
	}
}
 
Example #25
Source File: MemoryMeasurerTest.java    From jpmml-model with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void measure(){
	Interval interval = new Interval(Interval.Closure.CLOSED_CLOSED)
		.setLeftMargin(0d)
		.setRightMargin(1d);

	DataField left = new DataField(FieldName.create("x"), null, null)
		.addIntervals(interval);

	DataField right = new DataField(FieldName.create("x"), OpType.CONTINUOUS, DataType.DOUBLE)
		.addIntervals(interval);

	assertEquals(getSize(left), getSize(right));
}
 
Example #26
Source File: CategoricalValue.java    From jpmml-evaluator with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public int compareToValue(FieldValue value){

	if(value instanceof ScalarValue){
		ScalarValue that = (ScalarValue)value;

		if((DataType.BOOLEAN).equals(that.getDataType())){
			return Boolean.compare(this.asBoolean(), that.asBoolean());
		}
	}

	return compareToValue(value.getValue());
}
 
Example #27
Source File: ExpressionTranslatorTest.java    From jpmml-r with GNU Affero General Public License v3.0 5 votes vote down vote up
@Test
public void translateIfExpression(){
	String string = "if(is.na(x)) TRUE else FALSE";

	Expression expected = PMMLUtil.createApply(PMMLFunctions.IF)
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.ISMISSING)
			.addExpressions(new FieldRef(FieldName.create("x")))
		)
		.addExpressions(PMMLUtil.createConstant("true", DataType.BOOLEAN), PMMLUtil.createConstant("false", DataType.BOOLEAN));

	Expression actual = ExpressionTranslator.translateExpression(string);

	assertTrue(ReflectionUtil.equals(expected, actual));
}
 
Example #28
Source File: SVMConverter.java    From jpmml-r with GNU Affero General Public License v3.0 5 votes vote down vote up
private void encodeFormula(RExpEncoder encoder){
	RGenericVector svm = getObject();

	RDoubleVector type = svm.getDoubleElement("type");
	RDoubleVector sv = svm.getDoubleElement("SV");
	RVector<?> levels = svm.getVectorElement("levels");
	RExp terms = svm.getElement("terms");
	RGenericVector xlevels = DecorationUtil.getGenericElement(svm, "xlevels");

	Type svmType = Type.values()[ValueUtil.asInt(type.asScalar())];

	RStringVector rowNames = sv.dimnames(0);
	RStringVector columnNames = sv.dimnames(1);

	FormulaContext context = new XLevelsFormulaContext(xlevels);

	Formula formula = FormulaUtil.createFormula(terms, context, encoder);

	switch(svmType){
		case C_CLASSIFICATION:
		case NU_CLASSIFICATION:
			FormulaUtil.setLabel(formula, terms, levels, encoder);
			break;
		case ONE_CLASSIFICATION:
			encoder.setLabel(new ContinuousLabel(null, DataType.DOUBLE));
			break;
		case EPS_REGRESSION:
		case NU_REGRESSION:
			FormulaUtil.setLabel(formula, terms, null, encoder);
			break;
	}

	FormulaUtil.addFeatures(formula, columnNames, true, encoder);

	scaleFeatures(encoder);
}
 
Example #29
Source File: PoissonRegression.java    From jpmml-lightgbm with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public MiningModel encodeMiningModel(List<Tree> trees, Integer numIteration, Schema schema){
	Schema segmentSchema = schema.toAnonymousSchema();

	MiningModel miningModel = super.encodeMiningModel(trees, numIteration, segmentSchema)
		.setOutput(ModelUtil.createPredictedOutput(FieldName.create("lgbmValue"), OpType.CONTINUOUS, DataType.DOUBLE));

	return MiningModelUtil.createRegression(miningModel, RegressionModel.NormalizationMethod.EXP, schema);
}
 
Example #30
Source File: Functions.java    From jpmml-evaluator with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public FieldValue evaluate(FieldValue first, FieldValue second){
	DataType dataType = TypeUtil.getCommonDataType(first.getDataType(), second.getDataType());

	Integer result = evaluate(first.asNumber(), second.asNumber());

	return FieldValueUtil.create(dataType, OpType.CONTINUOUS, result);
}