Java Code Examples for org.dmg.pmml.DataType#STRING

The following examples show how to use org.dmg.pmml.DataType#STRING . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TypeUtil.java    From jpmml-evaluator with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * <p>
 * Casts the specified value to String data type.
 * </p>
 *
 * @see DataType#STRING
 */
static
private String toString(Object value){

	if(value instanceof String){
		return (String)value;
	} else

	if((value instanceof Double) || (value instanceof Float) || (value instanceof Long) || (value instanceof Integer) || (value instanceof Short) || (value instanceof Byte)){
		Number number = (Number)value;

		return number.toString();
	} else

	if(value instanceof Boolean){
		Boolean flag = (Boolean)value;

		return (flag.booleanValue() ? "true" : "false");
	}

	throw new TypeCheckException(DataType.STRING, value);
}
 
Example 2
Source File: DiscreteDomain.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public DataType getDataType(){
	Object dtype = getDType();
	Boolean withData = getWithData();

	if(dtype != null){
		return TransformerUtil.getDataType(dtype);
	} // End if

	if(withData){
		List<?> data = getData();

		return TypeUtil.getDataType(data, DataType.STRING);
	}

	return DataType.STRING;
}
 
Example 3
Source File: AppPMMLUtilsTest.java    From oryx with Apache License 2.0 6 votes vote down vote up
@Test
public void testBuildCategoricalEncoding() {
  List<DataField> dataFields = new ArrayList<>();
  dataFields.add(new DataField(FieldName.create("foo"), OpType.CONTINUOUS, DataType.DOUBLE));
  DataField barField =
      new DataField(FieldName.create("bar"), OpType.CATEGORICAL, DataType.STRING);
  barField.addValues(new Value("b"), new Value("a"));
  dataFields.add(barField);
  DataDictionary dictionary = new DataDictionary(dataFields).setNumberOfFields(dataFields.size());
  CategoricalValueEncodings encodings = AppPMMLUtils.buildCategoricalValueEncodings(dictionary);
  assertEquals(2, encodings.getValueCount(1));
  assertEquals(0, encodings.getValueEncodingMap(1).get("b").intValue());
  assertEquals(1, encodings.getValueEncodingMap(1).get("a").intValue());
  assertEquals("b", encodings.getEncodingValueMap(1).get(0));
  assertEquals("a", encodings.getEncodingValueMap(1).get(1));
  assertEquals(Collections.singletonMap(1, 2), encodings.getCategoryCounts());
}
 
Example 4
Source File: TypeUtil.java    From jpmml-tensorflow with GNU Affero General Public License v3.0 6 votes vote down vote up
static
public DataType getDataType(Output output){
	org.tensorflow.DataType dataType = output.dataType();

	switch(dataType){
		case FLOAT:
			return DataType.FLOAT;
		case DOUBLE:
			return DataType.DOUBLE;
		case INT32:
		case INT64:
			return DataType.INTEGER;
		case STRING:
			return DataType.STRING;
		case BOOL:
			return DataType.BOOLEAN;
		default:
			throw new IllegalArgumentException();
	}
}
 
Example 5
Source File: RExpUtil.java    From jpmml-r with GNU Affero General Public License v3.0 6 votes vote down vote up
static
public DataType getDataType(String type){

	switch(type){
		case "character":
		case "factor":
			return DataType.STRING;
		case "numeric":
			return DataType.DOUBLE;
		case "logical":
			return DataType.BOOLEAN;
		default:
			break;
	}

	throw new IllegalArgumentException(type);
}
 
Example 6
Source File: OutputColumnProducer.java    From jpmml-evaluator-spark with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public StructField init(Evaluator evaluator){
	OutputField field = getField();

	DataType dataType = field.getDataType();
	if(dataType == null){
		dataType = DataType.STRING;

		this.formatString = true;
	}

	return DataTypes.createStructField(getColumnName(), SchemaUtil.translateDataType(dataType), false);
}
 
Example 7
Source File: TypeUtil.java    From jpmml-evaluator with GNU Affero General Public License v3.0 5 votes vote down vote up
static
public DataType getConstantDataType(String value){

	if(("").equals(value)){
		return DataType.STRING;
	} else

	if(("NaN").equalsIgnoreCase(value) || ("INF").equalsIgnoreCase(value) || ("-INF").equalsIgnoreCase(value)){
		return DataType.DOUBLE;
	}

	try {
		if(value.indexOf('.') > -1){
			Double.parseDouble(value);

			return DataType.DOUBLE;
		} else

		{
			Long.parseLong(value);

			return DataType.INTEGER;
		}
	} catch(NumberFormatException nfe){
		return DataType.STRING;
	}
}
 
Example 8
Source File: TypeUtil.java    From jpmml-evaluator with GNU Affero General Public License v3.0 5 votes vote down vote up
static
public DataType getDataType(Collection<?> values){
	DataType result = null;

	for(Object value : values){

		if(value == null){
			continue;
		}

		DataType dataType = getDataType(value);

		if(result == null){
			result = dataType;
		} else

		{
			if(!(result).equals(dataType)){
				throw new TypeCheckException(result, value);
			}
		}
	}

	if(result == null){
		result = DataType.STRING;
	}

	return result;
}
 
Example 9
Source File: ExpressionTransformer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	Object dtype = getDType();
	String expr = getExpr();

	Scope scope = new DataFrameScope(FieldName.create("X"), features);

	Expression expression = ExpressionTranslator.translate(expr, scope);

	DataType dataType;

	if(dtype != null){
		dataType = TransformerUtil.getDataType(dtype);
	} else

	{
		if(ExpressionTranslator.isString(expression, scope)){
			dataType = DataType.STRING;
		} else

		{
			dataType = DataType.DOUBLE;
		}
	}

	OpType opType = TransformerUtil.getOpType(dataType);

	DerivedField derivedField = encoder.createDerivedField(FieldName.create("eval(" + expr + ")"), opType, dataType, expression);

	return Collections.singletonList(new ContinuousFeature(encoder, derivedField));
}
 
Example 10
Source File: AppPMMLUtils.java    From oryx with Apache License 2.0 5 votes vote down vote up
public static DataDictionary buildDataDictionary(
    InputSchema schema,
    CategoricalValueEncodings categoricalValueEncodings) {
  List<String> featureNames = schema.getFeatureNames();

  List<DataField> dataFields = new ArrayList<>();
  for (int featureIndex = 0; featureIndex < featureNames.size(); featureIndex++) {
    String featureName = featureNames.get(featureIndex);
    OpType opType;
    DataType dataType;
    if (schema.isNumeric(featureName)) {
      opType = OpType.CONTINUOUS;
      dataType = DataType.DOUBLE;
    } else if (schema.isCategorical(featureName)) {
      opType = OpType.CATEGORICAL;
      dataType = DataType.STRING;
    } else {
      // Don't know
      opType = null;
      dataType = null;
    }
    DataField field = new DataField(FieldName.create(featureName), opType, dataType);
    if (schema.isCategorical(featureName)) {
      Objects.requireNonNull(categoricalValueEncodings);
      categoricalValueEncodings.getEncodingValueMap(featureIndex).entrySet().stream().
          sorted(Comparator.comparing(Map.Entry::getKey)).
          map(Map.Entry::getValue).
          forEach(value -> field.addValues(new Value(value)));
    }
    dataFields.add(field);
  }

  return new DataDictionary(dataFields).setNumberOfFields(dataFields.size());
}
 
Example 11
Source File: FieldValueTest.java    From jpmml-evaluator with GNU Affero General Public License v3.0 4 votes vote down vote up
@Test
public void ordinalString(){
	OrdinalValue loud = (OrdinalValue)FieldValueUtil.create(TypeInfos.ORDINAL_STRING, "loud");
	OrdinalValue louder = (OrdinalValue)FieldValueUtil.create(TypeInfos.ORDINAL_STRING, "louder");
	OrdinalValue insane = (OrdinalValue)FieldValueUtil.create(TypeInfos.ORDINAL_STRING, "insane");

	assertFalse(louder.equalsValue("loud"));
	assertTrue(louder.equalsValue("louder"));
	assertFalse(louder.equalsValue("insane"));

	assertFalse(louder.equalsValue(loud));
	assertTrue(louder.equalsValue(louder));
	assertFalse(louder.equalsValue(insane));

	// Implicit (ie. lexicographic) ordering
	assertNull(loud.getOrdering());

	assertTrue(louder.compareToValue("loud") > 0);
	assertTrue(louder.compareToValue("louder") == 0);
	assertTrue(louder.compareToValue("insane") > 0);

	assertTrue(louder.compareTo(loud) > 0);
	assertTrue(louder.compareTo(louder) == 0);
	assertTrue(louder.compareTo(insane) > 0);

	TypeInfo typeInfo = new SimpleTypeInfo(DataType.STRING, OpType.ORDINAL, Arrays.asList("loud", "louder", "insane"));

	loud = (OrdinalValue)FieldValueUtil.create(typeInfo, loud.getValue());
	louder = (OrdinalValue)FieldValueUtil.create(typeInfo, louder.getValue());
	insane = (OrdinalValue)FieldValueUtil.create(typeInfo, insane.getValue());

	// Explicit ordering
	assertNotNull(loud.getOrdering());

	assertTrue(louder.compareToValue("loud") > 0);
	assertTrue(louder.compareToValue("louder") == 0);
	assertTrue(louder.compareToValue("insane") < 0);

	assertTrue(louder.compareTo(loud) > 0);
	assertTrue(louder.compareTo(louder) == 0);
	assertTrue(louder.compareTo(insane) < 0);
}
 
Example 12
Source File: OrdinalValue.java    From jpmml-evaluator with GNU Affero General Public License v3.0 4 votes vote down vote up
OrdinalString(List<?> ordering, Object value){
	super(DataType.STRING, ordering, value);
}
 
Example 13
Source File: RDFPMMLUtilsTest.java    From oryx with Apache License 2.0 4 votes vote down vote up
private static PMML buildDummyClassificationModel(int numTrees) {
  PMML pmml = PMMLUtils.buildSkeletonPMML();

  List<DataField> dataFields = new ArrayList<>();
  DataField predictor =
      new DataField(FieldName.create("color"), OpType.CATEGORICAL, DataType.STRING);
  predictor.addValues(new Value("yellow"), new Value("red"));
  dataFields.add(predictor);
  DataField target =
      new DataField(FieldName.create("fruit"), OpType.CATEGORICAL, DataType.STRING);
  target.addValues(new Value("banana"), new Value("apple"));
  dataFields.add(target);
  DataDictionary dataDictionary =
      new DataDictionary(dataFields).setNumberOfFields(dataFields.size());
  pmml.setDataDictionary(dataDictionary);

  List<MiningField> miningFields = new ArrayList<>();
  MiningField predictorMF = new MiningField(FieldName.create("color"))
      .setOpType(OpType.CATEGORICAL)
      .setUsageType(MiningField.UsageType.ACTIVE)
      .setImportance(0.5);
  miningFields.add(predictorMF);
  MiningField targetMF = new MiningField(FieldName.create("fruit"))
      .setOpType(OpType.CATEGORICAL)
      .setUsageType(MiningField.UsageType.PREDICTED);
  miningFields.add(targetMF);
  MiningSchema miningSchema = new MiningSchema(miningFields);

  double dummyCount = 2.0;
  Node rootNode =
    new ComplexNode().setId("r").setRecordCount(dummyCount).setPredicate(new True());

  double halfCount = dummyCount / 2;

  Node left = new ComplexNode().setId("r-").setRecordCount(halfCount).setPredicate(new True());
  left.addScoreDistributions(new ScoreDistribution("apple", halfCount));
  Node right = new ComplexNode().setId("r+").setRecordCount(halfCount)
      .setPredicate(new SimpleSetPredicate(FieldName.create("color"),
                                           SimpleSetPredicate.BooleanOperator.IS_NOT_IN,
                                           new Array(Array.Type.STRING, "red")));
  right.addScoreDistributions(new ScoreDistribution("banana", halfCount));

  rootNode.addNodes(right, left);

  TreeModel treeModel = new TreeModel(MiningFunction.CLASSIFICATION, miningSchema, rootNode)
      .setSplitCharacteristic(TreeModel.SplitCharacteristic.BINARY_SPLIT)
      .setMissingValueStrategy(TreeModel.MissingValueStrategy.DEFAULT_CHILD);

  if (numTrees > 1) {
    MiningModel miningModel = new MiningModel(MiningFunction.CLASSIFICATION, miningSchema);
    List<Segment> segments = new ArrayList<>();
    for (int i = 0; i < numTrees; i++) {
      segments.add(new Segment()
          .setId(Integer.toString(i))
          .setPredicate(new True())
          .setModel(treeModel)
          .setWeight(1.0));
    }
    miningModel.setSegmentation(
        new Segmentation(Segmentation.MultipleModelMethod.WEIGHTED_MAJORITY_VOTE, segments));
    pmml.addModels(miningModel);
  } else {
    pmml.addModels(treeModel);
  }

  return pmml;
}
 
Example 14
Source File: RStringVector.java    From jpmml-r with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public DataType getDataType(){
	return DataType.STRING;
}
 
Example 15
Source File: SubstringTransformer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public DataType getDataType(){
	return DataType.STRING;
}
 
Example 16
Source File: PatternTransformer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public DataType getDataType(){
	return DataType.STRING;
}
 
Example 17
Source File: CountVectorizer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public DataType getDataType(){
	return DataType.STRING;
}
 
Example 18
Source File: CategoryEncoder.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public DataType getDataType(){
	return DataType.STRING;
}
 
Example 19
Source File: CategoricalValue.java    From jpmml-evaluator with GNU Affero General Public License v3.0 4 votes vote down vote up
CategoricalString(Object value){
	super(DataType.STRING, value);
}
 
Example 20
Source File: ValueParserTest.java    From jpmml-evaluator with GNU Affero General Public License v3.0 2 votes vote down vote up
@Test
public void parseTreeModel(){
	DataField dataField = new DataField(FieldName.create("x1"), OpType.CATEGORICAL, DataType.STRING);

	DataDictionary dataDictionary = new DataDictionary()
		.addDataFields(dataField);

	NormDiscrete normDiscrete = new NormDiscrete(dataField.getName(), "1");

	DerivedField derivedField = new DerivedField(FieldName.create("global(" + dataField.getName() + ")"), OpType.CATEGORICAL, DataType.STRING, normDiscrete);

	TransformationDictionary transformationDictionary = new TransformationDictionary()
		.addDerivedFields(derivedField);

	SimplePredicate simplePredicate = new SimplePredicate(derivedField.getName(), SimplePredicate.Operator.EQUAL, "1");

	Node child = new LeafNode("1", simplePredicate);

	SimpleSetPredicate simpleSetPredicate = new SimpleSetPredicate(dataField.getName(), SimpleSetPredicate.BooleanOperator.IS_IN, new Array(Array.Type.STRING, "0 1"));

	Node root = new BranchNode("0", simpleSetPredicate)
		.addNodes(child);

	MiningField miningField = new MiningField(dataField.getName());

	MiningSchema miningSchema = new MiningSchema()
		.addMiningFields(miningField);

	TreeModel treeModel = new TreeModel(MiningFunction.REGRESSION, miningSchema, null)
		.setNode(root);

	PMML pmml = new PMML(Version.PMML_4_3.getVersion(), new Header(), dataDictionary)
		.setTransformationDictionary(transformationDictionary)
		.addModels(treeModel);

	List<DataField> dataFields = dataDictionary.getDataFields();

	ValueParser parser = new ValueParser(ValueParser.Mode.STRICT);
	parser.applyTo(pmml);

	dataField = dataFields.get(0);

	assertEquals("1", normDiscrete.getValue());
	assertEquals("1", simplePredicate.getValue());

	Array array = simpleSetPredicate.getArray();

	assertEquals(ImmutableSet.of("0", "1"), array.getValue());

	dataField.setDataType(DataType.INTEGER);

	parser.applyTo(pmml);

	dataField = dataFields.get(0);

	assertEquals(1, normDiscrete.getValue());
	assertEquals("1", simplePredicate.getValue());

	array = simpleSetPredicate.getArray();

	assertTrue(array instanceof RichComplexArray);
	assertEquals(ImmutableSet.of(0, 1), array.getValue());

	dataField.setDataType(DataType.DOUBLE);
	derivedField.setDataType(DataType.INTEGER);

	parser.applyTo(pmml);

	dataField = dataFields.get(0);

	assertEquals(1.0d, normDiscrete.getValue());
	assertEquals(1, simplePredicate.getValue());

	array = simpleSetPredicate.getArray();

	assertEquals(ImmutableSet.of(0.0d, 1.0d), array.getValue());

	dataField.setDataType(DataType.BOOLEAN);
	derivedField.setDataType(DataType.DOUBLE);

	parser.applyTo(pmml);

	dataField = dataFields.get(0);

	assertEquals(true, normDiscrete.getValue());
	assertEquals(1.0d, simplePredicate.getValue());

	array = simpleSetPredicate.getArray();

	assertEquals(ImmutableSet.of(false, true), array.getValue());

	derivedField.setDataType(DataType.BOOLEAN);

	parser.applyTo(pmml);

	dataField = dataFields.get(0);

	assertEquals(true, normDiscrete.getValue());
	assertEquals(true, simplePredicate.getValue());
}