Java Code Examples for org.dmg.pmml.OpType#CATEGORICAL

The following examples show how to use org.dmg.pmml.OpType#CATEGORICAL . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CategoricalValue.java    From jpmml-evaluator with GNU Affero General Public License v3.0 6 votes vote down vote up
static
public FieldValue create(DataType dataType, Object value){

	if(value instanceof Collection){
		return new CollectionValue(dataType, OpType.CATEGORICAL, (Collection<?>)value);
	}

	switch(dataType){
		case STRING:
			return new CategoricalString(value);
		case BOOLEAN:
			return new CategoricalBoolean(value);
		default:
			return new CategoricalValue(dataType, value);
	}
}
 
Example 2
Source File: AppPMMLUtilsTest.java    From oryx with Apache License 2.0 6 votes vote down vote up
@Test
public void testBuildCategoricalEncoding() {
  List<DataField> dataFields = new ArrayList<>();
  dataFields.add(new DataField(FieldName.create("foo"), OpType.CONTINUOUS, DataType.DOUBLE));
  DataField barField =
      new DataField(FieldName.create("bar"), OpType.CATEGORICAL, DataType.STRING);
  barField.addValues(new Value("b"), new Value("a"));
  dataFields.add(barField);
  DataDictionary dictionary = new DataDictionary(dataFields).setNumberOfFields(dataFields.size());
  CategoricalValueEncodings encodings = AppPMMLUtils.buildCategoricalValueEncodings(dictionary);
  assertEquals(2, encodings.getValueCount(1));
  assertEquals(0, encodings.getValueEncodingMap(1).get("b").intValue());
  assertEquals(1, encodings.getValueEncodingMap(1).get("a").intValue());
  assertEquals("b", encodings.getEncodingValueMap(1).get(0));
  assertEquals("a", encodings.getEncodingValueMap(1).get(1));
  assertEquals(Collections.singletonMap(1, 2), encodings.getCategoryCounts());
}
 
Example 3
Source File: TransformerUtil.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
static
public OpType getOpType(DataType dataType){

	switch(dataType){
		case STRING:
			return OpType.CATEGORICAL;
		case INTEGER:
		case FLOAT:
		case DOUBLE:
			return OpType.CONTINUOUS;
		case BOOLEAN:
			return OpType.CATEGORICAL;
		case DATE:
		case DATE_TIME:
			return OpType.ORDINAL;
		default:
			throw new IllegalArgumentException();
	}
}
 
Example 4
Source File: TypeUtil.java    From jpmml-tensorflow with GNU Affero General Public License v3.0 6 votes vote down vote up
static
public OpType getOpType(Output output){
	org.tensorflow.DataType dataType = output.dataType();

	switch(dataType){
		case FLOAT:
		case DOUBLE:
		case INT32:
		case INT64:
			return OpType.CONTINUOUS;
		case STRING:
		case BOOL:
			return OpType.CATEGORICAL;
		default:
			throw new IllegalArgumentException();
	}
}
 
Example 5
Source File: TypeUtil.java    From jpmml-evaluator with GNU Affero General Public License v3.0 5 votes vote down vote up
static
public OpType getOpType(DataType dataType){

	switch(dataType){
		case STRING:
			return OpType.CATEGORICAL;
		case INTEGER:
		case FLOAT:
		case DOUBLE:
			return OpType.CONTINUOUS;
		case BOOLEAN:
			return OpType.CATEGORICAL;
		case DATE:
		case TIME:
		case DATE_TIME:
			return OpType.ORDINAL;
		case DATE_DAYS_SINCE_0:
		case DATE_DAYS_SINCE_1960:
		case DATE_DAYS_SINCE_1970:
		case DATE_DAYS_SINCE_1980:
		case TIME_SECONDS:
		case DATE_TIME_SECONDS_SINCE_0:
		case DATE_TIME_SECONDS_SINCE_1960:
		case DATE_TIME_SECONDS_SINCE_1970:
		case DATE_TIME_SECONDS_SINCE_1980:
			return OpType.CONTINUOUS;
		default:
			throw new IllegalArgumentException();
	}
}
 
Example 6
Source File: AppPMMLUtils.java    From oryx with Apache License 2.0 5 votes vote down vote up
public static DataDictionary buildDataDictionary(
    InputSchema schema,
    CategoricalValueEncodings categoricalValueEncodings) {
  List<String> featureNames = schema.getFeatureNames();

  List<DataField> dataFields = new ArrayList<>();
  for (int featureIndex = 0; featureIndex < featureNames.size(); featureIndex++) {
    String featureName = featureNames.get(featureIndex);
    OpType opType;
    DataType dataType;
    if (schema.isNumeric(featureName)) {
      opType = OpType.CONTINUOUS;
      dataType = DataType.DOUBLE;
    } else if (schema.isCategorical(featureName)) {
      opType = OpType.CATEGORICAL;
      dataType = DataType.STRING;
    } else {
      // Don't know
      opType = null;
      dataType = null;
    }
    DataField field = new DataField(FieldName.create(featureName), opType, dataType);
    if (schema.isCategorical(featureName)) {
      Objects.requireNonNull(categoricalValueEncodings);
      categoricalValueEncodings.getEncodingValueMap(featureIndex).entrySet().stream().
          sorted(Comparator.comparing(Map.Entry::getKey)).
          map(Map.Entry::getValue).
          forEach(value -> field.addValues(new Value(value)));
    }
    dataFields.add(field);
  }

  return new DataDictionary(dataFields).setNumberOfFields(dataFields.size());
}
 
Example 7
Source File: PatternTransformer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public OpType getOpType(){
	return OpType.CATEGORICAL;
}
 
Example 8
Source File: CategoricalValue.java    From jpmml-evaluator with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public OpType getOpType(){
	return OpType.CATEGORICAL;
}
 
Example 9
Source File: RDFPMMLUtilsTest.java    From oryx with Apache License 2.0 4 votes vote down vote up
private static PMML buildDummyClassificationModel(int numTrees) {
  PMML pmml = PMMLUtils.buildSkeletonPMML();

  List<DataField> dataFields = new ArrayList<>();
  DataField predictor =
      new DataField(FieldName.create("color"), OpType.CATEGORICAL, DataType.STRING);
  predictor.addValues(new Value("yellow"), new Value("red"));
  dataFields.add(predictor);
  DataField target =
      new DataField(FieldName.create("fruit"), OpType.CATEGORICAL, DataType.STRING);
  target.addValues(new Value("banana"), new Value("apple"));
  dataFields.add(target);
  DataDictionary dataDictionary =
      new DataDictionary(dataFields).setNumberOfFields(dataFields.size());
  pmml.setDataDictionary(dataDictionary);

  List<MiningField> miningFields = new ArrayList<>();
  MiningField predictorMF = new MiningField(FieldName.create("color"))
      .setOpType(OpType.CATEGORICAL)
      .setUsageType(MiningField.UsageType.ACTIVE)
      .setImportance(0.5);
  miningFields.add(predictorMF);
  MiningField targetMF = new MiningField(FieldName.create("fruit"))
      .setOpType(OpType.CATEGORICAL)
      .setUsageType(MiningField.UsageType.PREDICTED);
  miningFields.add(targetMF);
  MiningSchema miningSchema = new MiningSchema(miningFields);

  double dummyCount = 2.0;
  Node rootNode =
    new ComplexNode().setId("r").setRecordCount(dummyCount).setPredicate(new True());

  double halfCount = dummyCount / 2;

  Node left = new ComplexNode().setId("r-").setRecordCount(halfCount).setPredicate(new True());
  left.addScoreDistributions(new ScoreDistribution("apple", halfCount));
  Node right = new ComplexNode().setId("r+").setRecordCount(halfCount)
      .setPredicate(new SimpleSetPredicate(FieldName.create("color"),
                                           SimpleSetPredicate.BooleanOperator.IS_NOT_IN,
                                           new Array(Array.Type.STRING, "red")));
  right.addScoreDistributions(new ScoreDistribution("banana", halfCount));

  rootNode.addNodes(right, left);

  TreeModel treeModel = new TreeModel(MiningFunction.CLASSIFICATION, miningSchema, rootNode)
      .setSplitCharacteristic(TreeModel.SplitCharacteristic.BINARY_SPLIT)
      .setMissingValueStrategy(TreeModel.MissingValueStrategy.DEFAULT_CHILD);

  if (numTrees > 1) {
    MiningModel miningModel = new MiningModel(MiningFunction.CLASSIFICATION, miningSchema);
    List<Segment> segments = new ArrayList<>();
    for (int i = 0; i < numTrees; i++) {
      segments.add(new Segment()
          .setId(Integer.toString(i))
          .setPredicate(new True())
          .setModel(treeModel)
          .setWeight(1.0));
    }
    miningModel.setSegmentation(
        new Segmentation(Segmentation.MultipleModelMethod.WEIGHTED_MAJORITY_VOTE, segments));
    pmml.addModels(miningModel);
  } else {
    pmml.addModels(treeModel);
  }

  return pmml;
}
 
Example 10
Source File: SubstringTransformer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public OpType getOpType(){
	return OpType.CATEGORICAL;
}
 
Example 11
Source File: LookupTransformer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public OpType getOpType(){
	return OpType.CATEGORICAL;
}
 
Example 12
Source File: CategoricalDomain.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public OpType getOpType(){
	return OpType.CATEGORICAL;
}
 
Example 13
Source File: LabelBinarizer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public OpType getOpType(){
	return OpType.CATEGORICAL;
}
 
Example 14
Source File: OneHotEncoder.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public OpType getOpType(){
	return OpType.CATEGORICAL;
}
 
Example 15
Source File: LabelEncoder.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public OpType getOpType(){
	return OpType.CATEGORICAL;
}
 
Example 16
Source File: MultiOneHotEncoder.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public OpType getOpType(){
	return OpType.CATEGORICAL;
}
 
Example 17
Source File: CountVectorizer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public OpType getOpType(){
	return OpType.CATEGORICAL;
}
 
Example 18
Source File: CategoryEncoder.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public OpType getOpType(){
	return OpType.CATEGORICAL;
}
 
Example 19
Source File: ObjectMapperTest.java    From jpmml-model with BSD 3-Clause "New" or "Revised" License 2 votes vote down vote up
@Test
public void jsonClone() throws Exception {
	DataField dataField = new DataField(FieldName.create("x"), OpType.CATEGORICAL, DataType.BOOLEAN);

	DataDictionary dataDictionary = new DataDictionary()
		.addDataFields(dataField);

	MiningField miningField = new MiningField(FieldName.create("x"));

	MiningSchema miningSchema = new MiningSchema()
		.addMiningFields(miningField);

	assertSame(dataField.getName(), miningField.getName());

	SimplePredicate simplePredicate = new SimplePredicate(FieldName.create("x"), SimplePredicate.Operator.IS_NOT_MISSING, null);

	Node node = new ComplexNode(simplePredicate);

	TreeModel treeModel = new TreeModel()
		.setMiningSchema(miningSchema)
		.setNode(node);

	PMML pmml = new PMML()
		.setDataDictionary(dataDictionary)
		.addModels(treeModel);

	DirectByteArrayOutputStream buffer = new DirectByteArrayOutputStream(1024);

	JacksonUtil.writePMML(pmml, buffer);

	PMML jsonPmml;

	try(InputStream is = buffer.getInputStream()){
		jsonPmml = JacksonUtil.readPMML(is);
	}

	DataDictionary jsonDataDictionary = jsonPmml.getDataDictionary();

	List<DataField> jsonDataFields = jsonDataDictionary.getDataFields();

	assertEquals(1, jsonDataFields.size());

	DataField jsonDataField = jsonDataFields.get(0);

	assertEquals(dataField.getName(), jsonDataField.getName());
	assertEquals(dataField.getOpType(), jsonDataField.getOpType());
	assertEquals(dataField.getDataType(), jsonDataField.getDataType());

	List<Model> jsonModels = jsonPmml.getModels();

	assertEquals(1, jsonModels.size());

	TreeModel jsonTreeModel = (TreeModel)jsonModels.get(0);

	MiningSchema jsonMiningSchema = jsonTreeModel.getMiningSchema();

	List<MiningField> jsonMiningFields = jsonMiningSchema.getMiningFields();

	assertEquals(1, jsonMiningFields.size());

	MiningField jsonMiningField = jsonMiningFields.get(0);

	assertEquals(miningField.getName(), jsonMiningField.getName());
	assertEquals(miningField.getUsageType(), jsonMiningField.getUsageType());

	assertSame(jsonDataField.getName(), jsonMiningField.getName());

	Node jsonNode = jsonTreeModel.getNode();

	SimplePredicate jsonSimplePredicate = (SimplePredicate)jsonNode.getPredicate();

	assertEquals(simplePredicate.getField(), jsonSimplePredicate.getField());
	assertEquals(simplePredicate.getOperator(), jsonSimplePredicate.getOperator());

	assertSame(jsonDataField.getName(), jsonSimplePredicate.getField());
	assertSame(jsonMiningField.getName(), jsonSimplePredicate.getField());
}
 
Example 20
Source File: ValueParserTest.java    From jpmml-evaluator with GNU Affero General Public License v3.0 2 votes vote down vote up
@Test
public void parseTreeModel(){
	DataField dataField = new DataField(FieldName.create("x1"), OpType.CATEGORICAL, DataType.STRING);

	DataDictionary dataDictionary = new DataDictionary()
		.addDataFields(dataField);

	NormDiscrete normDiscrete = new NormDiscrete(dataField.getName(), "1");

	DerivedField derivedField = new DerivedField(FieldName.create("global(" + dataField.getName() + ")"), OpType.CATEGORICAL, DataType.STRING, normDiscrete);

	TransformationDictionary transformationDictionary = new TransformationDictionary()
		.addDerivedFields(derivedField);

	SimplePredicate simplePredicate = new SimplePredicate(derivedField.getName(), SimplePredicate.Operator.EQUAL, "1");

	Node child = new LeafNode("1", simplePredicate);

	SimpleSetPredicate simpleSetPredicate = new SimpleSetPredicate(dataField.getName(), SimpleSetPredicate.BooleanOperator.IS_IN, new Array(Array.Type.STRING, "0 1"));

	Node root = new BranchNode("0", simpleSetPredicate)
		.addNodes(child);

	MiningField miningField = new MiningField(dataField.getName());

	MiningSchema miningSchema = new MiningSchema()
		.addMiningFields(miningField);

	TreeModel treeModel = new TreeModel(MiningFunction.REGRESSION, miningSchema, null)
		.setNode(root);

	PMML pmml = new PMML(Version.PMML_4_3.getVersion(), new Header(), dataDictionary)
		.setTransformationDictionary(transformationDictionary)
		.addModels(treeModel);

	List<DataField> dataFields = dataDictionary.getDataFields();

	ValueParser parser = new ValueParser(ValueParser.Mode.STRICT);
	parser.applyTo(pmml);

	dataField = dataFields.get(0);

	assertEquals("1", normDiscrete.getValue());
	assertEquals("1", simplePredicate.getValue());

	Array array = simpleSetPredicate.getArray();

	assertEquals(ImmutableSet.of("0", "1"), array.getValue());

	dataField.setDataType(DataType.INTEGER);

	parser.applyTo(pmml);

	dataField = dataFields.get(0);

	assertEquals(1, normDiscrete.getValue());
	assertEquals("1", simplePredicate.getValue());

	array = simpleSetPredicate.getArray();

	assertTrue(array instanceof RichComplexArray);
	assertEquals(ImmutableSet.of(0, 1), array.getValue());

	dataField.setDataType(DataType.DOUBLE);
	derivedField.setDataType(DataType.INTEGER);

	parser.applyTo(pmml);

	dataField = dataFields.get(0);

	assertEquals(1.0d, normDiscrete.getValue());
	assertEquals(1, simplePredicate.getValue());

	array = simpleSetPredicate.getArray();

	assertEquals(ImmutableSet.of(0.0d, 1.0d), array.getValue());

	dataField.setDataType(DataType.BOOLEAN);
	derivedField.setDataType(DataType.DOUBLE);

	parser.applyTo(pmml);

	dataField = dataFields.get(0);

	assertEquals(true, normDiscrete.getValue());
	assertEquals(1.0d, simplePredicate.getValue());

	array = simpleSetPredicate.getArray();

	assertEquals(ImmutableSet.of(false, true), array.getValue());

	derivedField.setDataType(DataType.BOOLEAN);

	parser.applyTo(pmml);

	dataField = dataFields.get(0);

	assertEquals(true, normDiscrete.getValue());
	assertEquals(true, simplePredicate.getValue());
}