org.dmg.pmml.DataField Java Examples

The following examples show how to use org.dmg.pmml.DataField. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ModelEvaluator.java    From jpmml-evaluator with GNU Affero General Public License v3.0 6 votes vote down vote up
protected boolean assessParentCompatibility(){
	List<InputField> inputFields = getInputFields();

	for(InputField inputField : inputFields){
		Field<?> field = inputField.getField();
		MiningField miningField = inputField.getMiningField();

		if(!(field instanceof DataField)){
			continue;
		} // End if

		if(!InputFieldUtil.isDefault(field, miningField)){
			return false;
		}
	}

	return true;
}
 
Example #2
Source File: Classification.java    From jpmml-lightgbm with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public Label encodeLabel(FieldName targetField, List<?> targetCategories, PMMLEncoder encoder){
	DataField dataField;

	if(targetCategories == null){
		targetCategories = LabelUtil.createTargetCategories(this.num_class_);

		dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.INTEGER, targetCategories);
	} else

	{
		if(targetCategories.size() != this.num_class_){
			throw new IllegalArgumentException("Expected " + this.num_class_ + " target categories, got " + targetCategories.size() + " target categories");
		}

		dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.STRING, targetCategories);
	}

	return new CategoricalLabel(dataField);
}
 
Example #3
Source File: Classification.java    From jpmml-xgboost with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public Label encodeLabel(FieldName targetField, List<?> targetCategories, PMMLEncoder encoder){
	DataField dataField;

	if(targetCategories == null){
		targetCategories = LabelUtil.createTargetCategories(this.num_class);

		dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.INTEGER, targetCategories);
	} else

	{
		if(targetCategories.size() != this.num_class){
			throw new IllegalArgumentException("Expected " + this.num_class + " target categories, got " + targetCategories.size() + " target categories");
		}

		dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.STRING, targetCategories);
	}

	return new CategoricalLabel(dataField);
}
 
Example #4
Source File: AppPMMLUtilsTest.java    From oryx with Apache License 2.0 6 votes vote down vote up
@Test
public void testBuildCategoricalEncoding() {
  List<DataField> dataFields = new ArrayList<>();
  dataFields.add(new DataField(FieldName.create("foo"), OpType.CONTINUOUS, DataType.DOUBLE));
  DataField barField =
      new DataField(FieldName.create("bar"), OpType.CATEGORICAL, DataType.STRING);
  barField.addValues(new Value("b"), new Value("a"));
  dataFields.add(barField);
  DataDictionary dictionary = new DataDictionary(dataFields).setNumberOfFields(dataFields.size());
  CategoricalValueEncodings encodings = AppPMMLUtils.buildCategoricalValueEncodings(dictionary);
  assertEquals(2, encodings.getValueCount(1));
  assertEquals(0, encodings.getValueEncodingMap(1).get("b").intValue());
  assertEquals(1, encodings.getValueEncodingMap(1).get("a").intValue());
  assertEquals("b", encodings.getEncodingValueMap(1).get(0));
  assertEquals("a", encodings.getEncodingValueMap(1).get(1));
  assertEquals(Collections.singletonMap(1, 2), encodings.getCategoryCounts());
}
 
Example #5
Source File: Transformer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
public DataField updateDataField(DataField dataField, OpType opType, DataType dataType, SkLearnEncoder encoder){
	FieldName name = dataField.getName();

	if(encoder.isFrozen(name)){
		return dataField;
	}

	switch(dataType){
		case DOUBLE:
			// If the DataField element already specifies a non-default data type, then keep it
			if(!(DataType.DOUBLE).equals(dataField.getDataType())){
				dataType = dataField.getDataType();
			}
			break;
	}

	dataField
		.setOpType(opType)
		.setDataType(dataType);

	return dataField;
}
 
Example #6
Source File: ModelManager.java    From jpmml-evaluator with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * @return A synthetic {@link DataField} element describing the default target field.
 */
public DataField getDefaultDataField(){

	if(this.defaultDataField != null){
		return this.defaultDataField;
	}

	MiningFunction miningFunction = getMiningFunction();
	switch(miningFunction){
		case REGRESSION:
			MathContext mathContext = getMathContext();

			switch(mathContext){
				case FLOAT:
					return ModelManager.DEFAULT_TARGET_CONTINUOUS_FLOAT;
				default:
					return ModelManager.DEFAULT_TARGET_CONTINUOUS_DOUBLE;
			}
		case CLASSIFICATION:
		case CLUSTERING:
			return ModelManager.DEFAULT_TARGET_CATEGORICAL_STRING;
		default:
			return null;
	}
}
 
Example #7
Source File: FieldUtil.java    From jpmml-evaluator with GNU Affero General Public License v3.0 6 votes vote down vote up
static
private List<Object> parseCategories(DataField dataField){
	List<Object> result = new ArrayList<>();

	if(dataField.hasValues()){
		List<Value> pmmlValues = dataField.getValues();

		for(Value pmmlValue : pmmlValues){
			Object simpleValue = pmmlValue.getValue();
			if(simpleValue == null){
				throw new MissingAttributeException(pmmlValue, PMMLAttributes.VALUE_VALUE);
			}

			Value.Property property = pmmlValue.getProperty();
			switch(property){
				case VALID:
					result.add(simpleValue);
					break;
				default:
					break;
			}
		}
	}

	return result;
}
 
Example #8
Source File: MapHolderParser.java    From jpmml-evaluator with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public VisitorAction visit(DataDictionary dataDictionary){

	if(dataDictionary.hasDataFields()){
		List<DataField> dataFields = dataDictionary.getDataFields();

		for(ListIterator<DataField> it = dataFields.listIterator(); it.hasNext(); ){
			DataField dataField = it.next();

			if(dataField.hasValues()){
				it.set(new RichDataField(dataField));
			}
		}
	}

	return super.visit(dataDictionary);
}
 
Example #9
Source File: MiningModelEvaluator.java    From jpmml-evaluator with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public DataField getDefaultDataField(){
	MiningModel miningModel = getModel();

	Segmentation segmentation = miningModel.getSegmentation();

	Segmentation.MultipleModelMethod multipleModelMethod = segmentation.getMultipleModelMethod();
	switch(multipleModelMethod){
		case SELECT_FIRST:
		case SELECT_ALL:
		case MODEL_CHAIN:
			return null;
		default:
			return super.getDefaultDataField();
	}
}
 
Example #10
Source File: NearestNeighborModelEvaluator.java    From jpmml-evaluator with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public DataField getDefaultDataField(){
	MiningFunction miningFunction = getMiningFunction();

	switch(miningFunction){
		case REGRESSION:
		case CLASSIFICATION:
		case MIXED:
			return null;
		default:
			return super.getDefaultDataField();
	}
}
 
Example #11
Source File: ModelEvaluationContext.java    From jpmml-evaluator with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
protected FieldValue prepare(FieldName name, Object value){
	ModelEvaluator<?> modelEvaluator = getModelEvaluator();

	DataField dataField = modelEvaluator.getDataField(name);
	if(dataField == null){
		throw new MissingFieldException(name);
	}

	MiningField miningField = modelEvaluator.getMiningField(name);
	if(miningField == null){
		throw new InvisibleFieldException(name);
	}

	MiningField.UsageType usageType = miningField.getUsageType();
	switch(usageType){
		case ACTIVE:
		case GROUP:
		case ORDER:
			{
				return InputFieldUtil.prepareInputValue(dataField, miningField, value);
			}
		case PREDICTED:
		case TARGET:
			{
				return InputFieldUtil.prepareResidualInputValue(dataField, miningField, value);
			}
		default:
			throw new UnsupportedAttributeException(miningField, usageType);
	}
}
 
Example #12
Source File: BinaryTreeConverter.java    From jpmml-r with GNU Affero General Public License v3.0 5 votes vote down vote up
private void encodeVariableList(RGenericVector tree, RExpEncoder encoder){
	RBooleanVector terminal = tree.getBooleanElement("terminal");
	RGenericVector psplit = tree.getGenericElement("psplit");
	RGenericVector left = tree.getGenericElement("left");
	RGenericVector right = tree.getGenericElement("right");

	if((Boolean.TRUE).equals(terminal.asScalar())){
		return;
	}

	RNumberVector<?> splitpoint = psplit.getNumericElement("splitpoint");
	RStringVector variableName = psplit.getStringElement("variableName");

	FieldName name = FieldName.create(variableName.asScalar());

	DataField dataField = encoder.getDataField(name);
	if(dataField == null){

		if(splitpoint instanceof RIntegerVector){
			RStringVector levels = splitpoint.getStringAttribute("levels");

			dataField = encoder.createDataField(name, OpType.CATEGORICAL, null, levels.getValues());
		} else

		if(splitpoint instanceof RDoubleVector){
			dataField = encoder.createDataField(name, OpType.CONTINUOUS, DataType.DOUBLE);
		} else

		{
			throw new IllegalArgumentException();
		}

		encoder.addFeature(dataField);

		this.featureIndexes.put(name, this.featureIndexes.size());
	}

	encodeVariableList(left, encoder);
	encodeVariableList(right, encoder);
}
 
Example #13
Source File: Regression.java    From jpmml-xgboost with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public Label encodeLabel(FieldName targetField, List<?> targetCategories, PMMLEncoder encoder){

	if(targetCategories != null){
		throw new IllegalArgumentException("Regression requires zero target categories");
	}

	DataField dataField = encoder.createDataField(targetField, OpType.CONTINUOUS, DataType.FLOAT);

	return new ContinuousLabel(dataField);
}
 
Example #14
Source File: SparkMLEncoder.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
public DataField createDataField(FieldName name){
	StructType schema = getSchema();

	StructField field = schema.apply(name.getValue());

	org.apache.spark.sql.types.DataType sparkDataType = field.dataType();

	if(sparkDataType instanceof StringType){
		return createDataField(name, OpType.CATEGORICAL, DataType.STRING);
	} else

	if(sparkDataType instanceof IntegralType){
		return createDataField(name, OpType.CONTINUOUS, DataType.INTEGER);
	} else

	if(sparkDataType instanceof DoubleType){
		return createDataField(name, OpType.CONTINUOUS, DataType.DOUBLE);
	} else

	if(sparkDataType instanceof BooleanType){
		return createDataField(name, OpType.CATEGORICAL, DataType.BOOLEAN);
	} else

	{
		throw new IllegalArgumentException("Expected string, integral, double or boolean data type, got " + sparkDataType.typeName() + " data type");
	}
}
 
Example #15
Source File: PMMLPipeline.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
static
private List<Feature> initFeatures(List<String> activeFields, OpType opType, DataType dataType, SkLearnEncoder encoder){
	List<Feature> result = new ArrayList<>();

	for(String activeField : activeFields){
		DataField dataField = encoder.createDataField(FieldName.create(activeField), opType, dataType);

		result.add(new WildcardFeature(encoder, dataField));
	}

	return result;
}
 
Example #16
Source File: Regression.java    From jpmml-lightgbm with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public Label encodeLabel(FieldName targetField, List<?> targetCategories, PMMLEncoder encoder){

	if(targetCategories != null && targetCategories.size() > 0){
		throw new IllegalArgumentException("Regression requires zero target categories");
	}

	DataField dataField = encoder.createDataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE);

	return new ContinuousLabel(dataField);
}
 
Example #17
Source File: NaNAsMissingDecorator.java    From jpmml-lightgbm with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public VisitorAction visit(DataField dataField){
	DataType dataType = dataField.getDataType();

	switch(dataType){
		case FLOAT:
		case DOUBLE:
			PMMLUtil.addValues(dataField, Collections.singletonList("NaN"), Property.MISSING);
			break;
		default:
			break;
	}

	return super.visit(dataField);
}
 
Example #18
Source File: IndexToStringConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	IndexToString transformer = getTransformer();

	DataField dataField = encoder.createDataField(formatName(transformer), OpType.CATEGORICAL, DataType.STRING, Arrays.asList(transformer.getLabels()));

	return Collections.singletonList(new CategoricalFeature(encoder, dataField));
}
 
Example #19
Source File: InputFieldUtilTest.java    From jpmml-evaluator with GNU Affero General Public License v3.0 5 votes vote down vote up
static
private void clearDomain(DataField dataField){
	List<Interval> intervals = dataField.getIntervals();
	intervals.clear();

	List<Value> values = dataField.getValues();
	values.clear();
}
 
Example #20
Source File: CategoricalDomain.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public Feature encode(WildcardFeature wildcardFeature, List<?> values){
	PMMLEncoder encoder = wildcardFeature.getEncoder();

	if(values == null || values.isEmpty()){
		DataField dataField = (DataField)encoder.getField(wildcardFeature.getName());

		dataField.setOpType(OpType.CATEGORICAL);

		return new ObjectFeature(encoder, dataField.getName(), dataField.getDataType());
	}

	return wildcardFeature.toCategoricalFeature(standardizeValues(wildcardFeature.getDataType(), values));
}
 
Example #21
Source File: GLMConverter.java    From jpmml-r with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void encodeSchema(RExpEncoder encoder){
	RGenericVector glm = getObject();

	RGenericVector family = glm.getGenericElement("family");
	RGenericVector model = glm.getGenericElement("model", false);

	RStringVector familyFamily = family.getStringElement("family");

	super.encodeSchema(encoder);

	MiningFunction miningFunction = getMiningFunction(familyFamily.asScalar());
	switch(miningFunction){
		case CLASSIFICATION:
			Label label = encoder.getLabel();

			if(model != null){
				RIntegerVector variable = model.getFactorElement((label.getName()).getValue());

				DataField dataField = (DataField)encoder.toCategorical(label.getName(), RExpUtil.getFactorLevels(variable));

				encoder.setLabel(dataField);
			}
			break;
		default:
			break;
	}
}
 
Example #22
Source File: KMeansConverter.java    From jpmml-r with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void encodeSchema(RExpEncoder encoder){
	RGenericVector kmeans = getObject();

	RDoubleVector centers = kmeans.getDoubleElement("centers");

	RStringVector columnNames = centers.dimnames(1);
	for(int i = 0; i < columnNames.size(); i++){
		String columnName = columnNames.getValue(i);

		DataField dataField = encoder.createDataField(FieldName.create(columnName), OpType.CONTINUOUS, DataType.DOUBLE);

		encoder.addFeature(dataField);
	}
}
 
Example #23
Source File: XPathUtilTest.java    From jpmml-model with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void formatDataField() throws Exception {
	assertEquals("DataField", XPathUtil.formatElement(DataField.class));

	assertEquals("DataField@name", XPathUtil.formatElementOrAttribute(PMMLAttributes.DATAFIELD_NAME));
	assertEquals("DataField/Value", XPathUtil.formatElementOrAttribute(PMMLElements.DATAFIELD_VALUES));

	assertEquals("DataField@isCyclic", XPathUtil.formatAttribute(PMMLAttributes.DATAFIELD_CYCLIC, null));
	assertEquals("DataField@isCyclic=0", XPathUtil.formatAttribute(PMMLAttributes.DATAFIELD_CYCLIC, "0"));
}
 
Example #24
Source File: EnumUtilTest.java    From jpmml-model with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void getEnumValue(){
	DataField.Cyclic zero = DataField.Cyclic.ZERO;

	assertEquals("ZERO", zero.name());
	assertEquals("0", zero.value());

	assertEquals("0", EnumUtil.getEnumValue(zero));
}
 
Example #25
Source File: AppPMMLUtils.java    From oryx with Apache License 2.0 5 votes vote down vote up
public static DataDictionary buildDataDictionary(
    InputSchema schema,
    CategoricalValueEncodings categoricalValueEncodings) {
  List<String> featureNames = schema.getFeatureNames();

  List<DataField> dataFields = new ArrayList<>();
  for (int featureIndex = 0; featureIndex < featureNames.size(); featureIndex++) {
    String featureName = featureNames.get(featureIndex);
    OpType opType;
    DataType dataType;
    if (schema.isNumeric(featureName)) {
      opType = OpType.CONTINUOUS;
      dataType = DataType.DOUBLE;
    } else if (schema.isCategorical(featureName)) {
      opType = OpType.CATEGORICAL;
      dataType = DataType.STRING;
    } else {
      // Don't know
      opType = null;
      dataType = null;
    }
    DataField field = new DataField(FieldName.create(featureName), opType, dataType);
    if (schema.isCategorical(featureName)) {
      Objects.requireNonNull(categoricalValueEncodings);
      categoricalValueEncodings.getEncodingValueMap(featureIndex).entrySet().stream().
          sorted(Comparator.comparing(Map.Entry::getKey)).
          map(Map.Entry::getValue).
          forEach(value -> field.addValues(new Value(value)));
    }
    dataFields.add(field);
  }

  return new DataDictionary(dataFields).setNumberOfFields(dataFields.size());
}
 
Example #26
Source File: AppPMMLUtils.java    From oryx with Apache License 2.0 5 votes vote down vote up
/**
 * @param dictionary {@link DataDictionary} from model
 * @return names of features in order
 */
public static List<String> getFeatureNames(DataDictionary dictionary) {
  List<DataField> dataFields = dictionary.getDataFields();
  Preconditions.checkArgument(dataFields != null && !dataFields.isEmpty(),
                              "No fields in DataDictionary");
  return dataFields.stream().map(field -> field.getName().getValue()).collect(Collectors.toList());
}
 
Example #27
Source File: AppPMMLUtils.java    From oryx with Apache License 2.0 5 votes vote down vote up
public static CategoricalValueEncodings buildCategoricalValueEncodings(
    DataDictionary dictionary) {
  Map<Integer,Collection<String>> indexToValues = new HashMap<>();
  List<DataField> dataFields = dictionary.getDataFields();
  for (int featureIndex = 0; featureIndex < dataFields.size(); featureIndex++) {
    DataField field = dataFields.get(featureIndex);
    Collection<Value> values = field.getValues();
    if (values != null && !values.isEmpty()) {
      Collection<String> categoricalValues =
          values.stream().map(v -> v.getValue().toString()).collect(Collectors.toList());
      indexToValues.put(featureIndex, categoricalValues);
    }
  }
  return new CategoricalValueEncodings(indexToValues);
}
 
Example #28
Source File: AppPMMLUtilsTest.java    From oryx with Apache License 2.0 5 votes vote down vote up
private static void checkDataField(DataField field, String name, Boolean categorical) {
  assertEquals(name, field.getName().getValue());
  if (categorical == null) {
    assertNull(field.getOpType());
    assertNull(field.getDataType());
  } else if (categorical) {
    assertEquals(OpType.CATEGORICAL, field.getOpType());
    assertEquals(DataType.STRING, field.getDataType());
  } else {
    assertEquals(OpType.CONTINUOUS, field.getOpType());
    assertEquals(DataType.DOUBLE, field.getDataType());
  }
}
 
Example #29
Source File: MemoryMeasurerTest.java    From jpmml-model with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void measure(){
	Interval interval = new Interval(Interval.Closure.CLOSED_CLOSED)
		.setLeftMargin(0d)
		.setRightMargin(1d);

	DataField left = new DataField(FieldName.create("x"), null, null)
		.addIntervals(interval);

	DataField right = new DataField(FieldName.create("x"), OpType.CONTINUOUS, DataType.DOUBLE)
		.addIntervals(interval);

	assertEquals(getSize(left), getSize(right));
}
 
Example #30
Source File: AbstractAppMLlibIT.java    From oryx with Apache License 2.0 5 votes vote down vote up
protected static void checkDataDictionary(InputSchema schema, DataDictionary dataDictionary) {
  assertNotNull(dataDictionary);
  assertEquals("Wrong number of features",
               schema.getNumFeatures(),
               dataDictionary.getNumberOfFields().intValue());
  List<DataField> dataFields = dataDictionary.getDataFields();
  assertEquals(schema.getNumFeatures(), dataFields.size());
  for (DataField dataField : dataFields) {
    String featureName = dataField.getName().getValue();
    if (schema.isNumeric(featureName)) {
      assertEquals("Wrong op type for feature " + featureName,
                   OpType.CONTINUOUS,
                   dataField.getOpType());
      assertEquals("Wrong data type for feature " + featureName,
                   DataType.DOUBLE,
                   dataField.getDataType());
    } else if (schema.isCategorical(featureName)) {
      assertEquals("Wrong op type for feature " + featureName,
                   OpType.CATEGORICAL,
                   dataField.getOpType());
      assertEquals("Wrong data type for feature " + featureName,
                   DataType.STRING,
                   dataField.getDataType());
    } else {
      assertNull(dataField.getOpType());
      assertNull(dataField.getDataType());
    }
  }
}