Java Code Examples for org.dmg.pmml.DataType

The following examples show how to use org.dmg.pmml.DataType. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	UFunc func = getFunc();

	if(func == null){
		return features;
	}

	List<Feature> result = new ArrayList<>();

	for(int i = 0; i < features.size(); i++){
		ContinuousFeature continuousFeature = (features.get(i)).toContinuousFeature();

		DerivedField derivedField = encoder.ensureDerivedField(FeatureUtil.createName(func.getName(), continuousFeature), OpType.CONTINUOUS, DataType.DOUBLE, () -> UFuncUtil.encodeUFunc(func, Collections.singletonList(continuousFeature.ref())));

		result.add(new ContinuousFeature(encoder, derivedField));
	}

	return result;
}
 
Example 2
@Test
public void getConstantDataType(){
	assertEquals(DataType.STRING, TypeUtil.getConstantDataType(""));

	assertEquals(DataType.INTEGER, TypeUtil.getConstantDataType("-1"));
	assertEquals(DataType.INTEGER, TypeUtil.getConstantDataType("1"));
	assertEquals(DataType.INTEGER, TypeUtil.getConstantDataType("+1"));
	assertEquals(DataType.STRING, TypeUtil.getConstantDataType("1E0"));
	assertEquals(DataType.STRING, TypeUtil.getConstantDataType("1X"));

	assertEquals(DataType.DOUBLE, TypeUtil.getConstantDataType("-1.0"));
	assertEquals(DataType.DOUBLE, TypeUtil.getConstantDataType("1.0"));
	assertEquals(DataType.DOUBLE, TypeUtil.getConstantDataType("+1.0"));
	assertEquals(DataType.DOUBLE, TypeUtil.getConstantDataType("1.0E-1"));
	assertEquals(DataType.DOUBLE, TypeUtil.getConstantDataType("1.0E1"));
	assertEquals(DataType.DOUBLE, TypeUtil.getConstantDataType("1.0E+1"));
	assertEquals(DataType.STRING, TypeUtil.getConstantDataType("1.0X"));
}
 
Example 3
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	RegexTokenizer transformer = getTransformer();

	if(!transformer.getGaps()){
		throw new IllegalArgumentException("Expected splitter mode, got token matching mode");
	} // End if

	if(transformer.getMinTokenLength() != 1){
		throw new IllegalArgumentException("Expected 1 as minimum token length, got " + transformer.getMinTokenLength() + " as minimum token length");
	}

	Feature feature = encoder.getOnlyFeature(transformer.getInputCol());

	Field<?> field = feature.getField();

	if(transformer.getToLowercase()){
		Apply apply = PMMLUtil.createApply(PMMLFunctions.LOWERCASE, feature.ref());

		field = encoder.createDerivedField(FeatureUtil.createName("lowercase", feature), OpType.CATEGORICAL, DataType.STRING, apply);
	}

	return Collections.singletonList(new DocumentFeature(encoder, field, transformer.getPattern()));
}
 
Example 4
static
public OpType getOpType(DataType dataType){

	switch(dataType){
		case STRING:
			return OpType.CATEGORICAL;
		case INTEGER:
		case FLOAT:
		case DOUBLE:
			return OpType.CONTINUOUS;
		case BOOLEAN:
			return OpType.CATEGORICAL;
		case DATE:
		case DATE_TIME:
			return OpType.ORDINAL;
		default:
			throw new IllegalArgumentException();
	}
}
 
Example 5
@Override
public Label encodeLabel(FieldName targetField, List<?> targetCategories, PMMLEncoder encoder){
	DataField dataField;

	if(targetCategories == null){
		targetCategories = LabelUtil.createTargetCategories(this.num_class);

		dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.INTEGER, targetCategories);
	} else

	{
		if(targetCategories.size() != this.num_class){
			throw new IllegalArgumentException("Expected " + this.num_class + " target categories, got " + targetCategories.size() + " target categories");
		}

		dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.STRING, targetCategories);
	}

	return new CategoricalLabel(dataField);
}
 
Example 6
Source Project: jpmml-r   Source File: RExpUtil.java    License: GNU Affero General Public License v3.0 6 votes vote down vote up
static
public DataType getDataType(String type){

	switch(type){
		case "character":
		case "factor":
			return DataType.STRING;
		case "numeric":
			return DataType.DOUBLE;
		case "logical":
			return DataType.BOOLEAN;
		default:
			break;
	}

	throw new IllegalArgumentException(type);
}
 
Example 7
/**
 * @see DataType#DATE
 */
static
private LocalDate toDate(Object value){

	if(value instanceof LocalDate){
		return (LocalDate)value;
	} else

	if(value instanceof LocalDateTime){
		LocalDateTime instant = (LocalDateTime)value;

		return instant.toLocalDate();
	}

	throw new TypeCheckException(DataType.DATE, value);
}
 
Example 8
static
private Schema toTreeModelSchema(DataType dataType, Schema schema){
	Function<Feature, Feature> function = new Function<Feature, Feature>(){

		@Override
		public Feature apply(Feature feature){

			if(feature instanceof BinaryFeature){
				BinaryFeature binaryFeature = (BinaryFeature)feature;

				return binaryFeature;
			} else

			{
				ContinuousFeature continuousFeature = feature.toContinuousFeature(dataType);

				return continuousFeature;
			}
		}
	};

	return schema.toTransformedSchema(function);
}
 
Example 9
public DataField updateDataField(DataField dataField, OpType opType, DataType dataType, SkLearnEncoder encoder){
	FieldName name = dataField.getName();

	if(encoder.isFrozen(name)){
		return dataField;
	}

	switch(dataType){
		case DOUBLE:
			// If the DataField element already specifies a non-default data type, then keep it
			if(!(DataType.DOUBLE).equals(dataField.getDataType())){
				dataType = dataField.getDataType();
			}
			break;
	}

	dataField
		.setOpType(opType)
		.setDataType(dataType);

	return dataField;
}
 
Example 10
@Override
public DataType getDataType(){

	if(hasTransformers()){
		List<? extends Transformer> transformers = getTransformers();

		for(Transformer transformer : transformers){
			return transformer.getDataType();
		}
	} // End if

	if(hasFinalEstimator()){
		Estimator estimator = getFinalEstimator();

		return estimator.getDataType();
	}

	throw new UnsupportedOperationException();
}
 
Example 11
static
public DataType getConstantDataType(String value){

	if(("").equals(value)){
		return DataType.STRING;
	} else

	if(("NaN").equalsIgnoreCase(value) || ("INF").equalsIgnoreCase(value) || ("-INF").equalsIgnoreCase(value)){
		return DataType.DOUBLE;
	}

	try {
		if(value.indexOf('.') > -1){
			Double.parseDouble(value);

			return DataType.DOUBLE;
		} else

		{
			Long.parseLong(value);

			return DataType.INTEGER;
		}
	} catch(NumberFormatException nfe){
		return DataType.STRING;
	}
}
 
Example 12
@Override
public FieldValue evaluate(FieldValue first, FieldValue second){
	DataType dataType = TypeUtil.getCommonDataType(first.getDataType(), second.getDataType());

	Integer result = evaluate(first.asNumber(), second.asNumber());

	return FieldValueUtil.create(dataType, OpType.CONTINUOUS, result);
}
 
Example 13
default
public V get(DataType dataType, Object key){
	Map<?, V> map = getMap();

	if(!(getDataType()).equals(dataType)){
		throw new TypeCheckException(getDataType(), key);
	}

	return map.get(key);
}
 
Example 14
@Override
public VisitorAction visit(DataField dataField){
	DataType dataType = dataField.getDataType();

	switch(dataType){
		case FLOAT:
		case DOUBLE:
			PMMLUtil.addValues(dataField, Collections.singletonList("NaN"), Property.MISSING);
			break;
		default:
			break;
	}

	return super.visit(dataField);
}
 
Example 15
@Override
public MiningModel encodeMiningModel(List<Tree> trees, Integer numIteration, Schema schema){
	Schema segmentSchema = schema.toAnonymousSchema();

	MiningModel miningModel = super.encodeMiningModel(trees, numIteration, segmentSchema)
		.setOutput(ModelUtil.createPredictedOutput(FieldName.create("lgbmValue"), OpType.CONTINUOUS, DataType.DOUBLE));

	return MiningModelUtil.createRegression(miningModel, RegressionModel.NormalizationMethod.EXP, schema);
}
 
Example 16
@Override
public Label encodeLabel(FieldName targetField, List<?> targetCategories, PMMLEncoder encoder){

	if(targetCategories != null && targetCategories.size() > 0){
		throw new IllegalArgumentException("Regression requires zero target categories");
	}

	DataField dataField = encoder.createDataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE);

	return new ContinuousLabel(dataField);
}
 
Example 17
@Override
public DataType getDataType(){
	Map<?, ?> mapping = getMapping();

	List<Object> inputValues = new ArrayList<>(mapping.keySet());

	return TypeUtil.getDataType(inputValues, DataType.STRING);
}
 
Example 18
static
public DataType parseDataType(String dtype){

	switch(dtype){
		case "datetime64[D]":
			return DataType.DATE;
		case "datetime64[s]":
			return DataType.DATE_TIME;
		default:
			throw new IllegalArgumentException(dtype);
	}
}
 
Example 19
Source Project: jpmml-r   Source File: RExpEncoder.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public DataField createDataField(FieldName name, OpType opType, DataType dataType, List<?> values){

	if(dataType == null){
		dataType = TypeUtil.getDataType(values);
	}

	return super.createDataField(name, opType, dataType, values);
}
 
Example 20
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	IndexToString transformer = getTransformer();

	DataField dataField = encoder.createDataField(formatName(transformer), OpType.CATEGORICAL, DataType.STRING, Arrays.asList(transformer.getLabels()));

	return Collections.singletonList(new CategoricalFeature(encoder, dataField));
}
 
Example 21
private Map<Object, Integer> parseValues(){
	DataType dataType = getDataType();

	Map<Object, Integer> result = new LinkedHashMap<>();

	int validIndex = 0;

	List<Value> pmmlValues = getValues();
	for(Value pmmlValue : pmmlValues){
		Object objectValue = pmmlValue.getValue();
		if(objectValue == null){
			throw new MissingAttributeException(pmmlValue, PMMLAttributes.VALUE_VALUE);
		}

		Value.Property property = pmmlValue.getProperty();
		switch(property){
			case VALID:
				{
					validIndex++;

					Object value = TypeUtil.parseOrCast(dataType, objectValue);

					result.put(value, validIndex);
				}
				break;
			case INVALID:
			case MISSING:
				throw new InvalidAttributeException(pmmlValue, property);
			default:
				throw new UnsupportedAttributeException(pmmlValue, property);
		}
	}

	return result;
}
 
Example 22
static
private Integer toInteger(Number value){

	try {
		return Math.toIntExact(value.longValue());
	} catch(ArithmeticException ae){
		throw new TypeCheckException(DataType.INTEGER, value)
			.initCause(ae);
	}
}
 
Example 23
@Override
public FieldValue evaluate(List<FieldValue> arguments){
	StorelessUnivariateStatistic statistic = createStatistic();

	DataType dataType = null;

	for(int i = 0; i < arguments.size(); i++){
		FieldValue value = getOptionalArgument(arguments, i);

		// "Missing values in the input to an aggregate function are simply ignored"
		if(FieldValueUtil.isMissing(value)){
			continue;
		}

		statistic.increment((value.asNumber()).doubleValue());

		if(dataType != null){
			dataType = TypeUtil.getCommonDataType(dataType, value.getDataType());
		} else

		{
			dataType = value.getDataType();
		}
	}

	// "If all inputs are missing, then the result evaluates to a missing value"
	if(statistic.getN() == 0){
		return FieldValues.MISSING_VALUE;
	}

	Double result = statistic.getResult();

	return FieldValueUtil.create(getResultDataType(dataType), OpType.CONTINUOUS, result);
}
 
Example 24
Source Project: jpmml-r   Source File: GBMConverter.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
private MiningModel encodeBinaryClassification(List<TreeModel> treeModels, Double initF, double coefficient, Schema schema){
	Schema segmentSchema = schema.toAnonymousRegressorSchema(DataType.DOUBLE);

	MiningModel miningModel = createMiningModel(treeModels, initF, segmentSchema)
		.setOutput(ModelUtil.createPredictedOutput(FieldName.create("gbmValue"), OpType.CONTINUOUS, DataType.DOUBLE));

	return MiningModelUtil.createBinaryLogisticClassification(miningModel, -coefficient, 0d, RegressionModel.NormalizationMethod.LOGIT, true, schema);
}
 
Example 25
Source Project: oryx   Source File: AppPMMLUtilsTest.java    License: Apache License 2.0 5 votes vote down vote up
private static void checkDataField(DataField field, String name, Boolean categorical) {
  assertEquals(name, field.getName().getValue());
  if (categorical == null) {
    assertNull(field.getOpType());
    assertNull(field.getDataType());
  } else if (categorical) {
    assertEquals(OpType.CATEGORICAL, field.getOpType());
    assertEquals(DataType.STRING, field.getDataType());
  } else {
    assertEquals(OpType.CONTINUOUS, field.getOpType());
    assertEquals(DataType.DOUBLE, field.getDataType());
  }
}
 
Example 26
Source Project: jpmml-sklearn   Source File: KMeans.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public ClusteringModel encodeModel(Schema schema){
	int[] shape = getClusterCentersShape();

	int numberOfClusters = shape[0];
	int numberOfFeatures = shape[1];

	List<? extends Number> clusterCenters = getClusterCenters();
	List<Integer> labels = getLabels();

	Multiset<Integer> labelCounts = HashMultiset.create();

	if(labels != null){
		labelCounts.addAll(labels);
	}

	List<Cluster> clusters = new ArrayList<>();

	for(int i = 0; i < numberOfClusters; i++){
		Cluster cluster = new Cluster(PMMLUtil.createRealArray(CMatrixUtil.getRow(clusterCenters, numberOfClusters, numberOfFeatures, i)))
			.setId(String.valueOf(i))
			.setSize((labelCounts.size () > 0 ? labelCounts.count(i) : null));

		clusters.add(cluster);
	}

	ComparisonMeasure comparisonMeasure = new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE, new SquaredEuclidean())
		.setCompareFunction(CompareFunction.ABS_DIFF);

	ClusteringModel clusteringModel = new ClusteringModel(MiningFunction.CLUSTERING, ClusteringModel.ModelClass.CENTER_BASED, numberOfClusters, ModelUtil.createMiningSchema(schema.getLabel()), comparisonMeasure, ClusteringModelUtil.createClusteringFields(schema.getFeatures()), clusters)
		.setOutput(ClusteringModelUtil.createOutput(FieldName.create("Cluster"), DataType.DOUBLE, clusters));

	return clusteringModel;
}
 
Example 27
public Apply createApply(){
	DefineFunction defineFunction = getDefineFunction();
	Feature feature = getFeature();
	String value = getValue();

	Constant constant = PMMLUtil.createConstant(value, DataType.STRING);

	return PMMLUtil.createApply(defineFunction.getName(), feature.ref(), constant);
}
 
Example 28
@Test
public void translateLogicalExpression(){
	String string = "isnull(x1) and not(isnotnull(x2))";

	FieldRef first = new FieldRef(FieldName.create("x1"));
	FieldRef second = new FieldRef(FieldName.create("x2"));

	Apply expected = PMMLUtil.createApply(PMMLFunctions.AND)
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.ISMISSING)
			.addExpressions(first)
		)
		// "not(isnotnull(..)) -> "isnull(..)"
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.ISMISSING)
			.addExpressions(second)
		);

	checkExpression(expected, string);

	string = "(x1 <= 0) or (x2 >= 0)";

	expected = PMMLUtil.createApply(PMMLFunctions.OR)
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.LESSOREQUAL)
			.addExpressions(first, PMMLUtil.createConstant(0, DataType.DOUBLE))
		)
		.addExpressions(PMMLUtil.createApply(PMMLFunctions.GREATEROREQUAL)
			.addExpressions(second, PMMLUtil.createConstant(0, DataType.DOUBLE))
		);

	checkExpression(expected, string);
}
 
Example 29
@Override
public DataType getDataType(){
	List<List<?>> categories = getCategories();

	DataType result = null;

	for(int i = 0; i < categories.size(); i++){
		List<?> featureCategories = categories.get(i);

		DataType dataType = TypeUtil.getDataType(featureCategories, null);

		if(result == null){
			result = dataType;
		} else

		{
			if(!(result).equals(dataType)){
				throw new UnsupportedOperationException();
			}
		}
	}

	if(result == null){
		result = DataType.STRING;
	}

	return result;
}
 
Example 30
@Override
public MiningModel encodeMiningModel(List<RegTree> trees, List<Float> weights, float base_score, Integer ntreeLimit, Schema schema){
	Schema segmentSchema = schema.toAnonymousSchema();

	MiningModel miningModel = createMiningModel(trees, weights, base_score, ntreeLimit, segmentSchema)
		.setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue"), OpType.CONTINUOUS, DataType.FLOAT));

	return MiningModelUtil.createRegression(miningModel, RegressionModel.NormalizationMethod.LOGIT, schema);
}