org.dmg.pmml.MissingValueTreatmentMethod Java Examples

The following examples show how to use org.dmg.pmml.MissingValueTreatmentMethod. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SimpleImputer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
static
private MissingValueTreatmentMethod parseStrategy(String strategy){

	switch(strategy){
		case "constant":
			return MissingValueTreatmentMethod.AS_VALUE;
		case "mean":
			return MissingValueTreatmentMethod.AS_MEAN;
		case "median":
			return MissingValueTreatmentMethod.AS_MEDIAN;
		case "most_frequent":
			return MissingValueTreatmentMethod.AS_MODE;
		default:
			throw new IllegalArgumentException(strategy);
	}
}
 
Example #2
Source File: CategoricalImputer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	Object fill = getFill();
	Object missingValues = getMissingValues();

	ClassDictUtil.checkSize(1, features);

	if(("NaN").equals(missingValues)){
		missingValues = null;
	}

	Feature feature = features.get(0);

	feature = ImputerUtil.encodeFeature(feature, false, missingValues, fill, MissingValueTreatmentMethod.AS_MODE, encoder);

	return Collections.singletonList(feature);
}
 
Example #3
Source File: DomainUtil.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
static
public MissingValueTreatmentMethod parseMissingValueTreatment(String missingValueTreatment){

	if(missingValueTreatment == null){
		return null;
	}

	switch(missingValueTreatment){
		case "as_is":
			return MissingValueTreatmentMethod.AS_IS;
		case "as_mean":
			return MissingValueTreatmentMethod.AS_MEAN;
		case "as_mode":
			return MissingValueTreatmentMethod.AS_MODE;
		case "as_median":
			return MissingValueTreatmentMethod.AS_MEDIAN;
		case "as_value":
			return MissingValueTreatmentMethod.AS_VALUE;
		case "return_invalid":
			return MissingValueTreatmentMethod.RETURN_INVALID;
		default:
			throw new IllegalArgumentException(missingValueTreatment);
	}
}
 
Example #4
Source File: ImputerModelConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
static
public MissingValueTreatmentMethod parseStrategy(String strategy){

	switch(strategy){
		case "mean":
			return MissingValueTreatmentMethod.AS_MEAN;
		case "median":
			return MissingValueTreatmentMethod.AS_MEDIAN;
		default:
			throw new IllegalArgumentException(strategy);
	}
}
 
Example #5
Source File: SimpleImputer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	Boolean addIndicator = getAddIndicator();
	Object missingValues = getMissingValues();
	List<?> statistics = getStatistics();
	String strategy = getStrategy();

	ClassDictUtil.checkSize(features, statistics);

	if((Double.valueOf(Double.NaN)).equals(missingValues)){
		missingValues = null;
	}

	MissingValueTreatmentMethod missingValueTreatment = parseStrategy(strategy);

	List<Feature> indicatorFeatures = new ArrayList<>();

	List<Feature> result = new ArrayList<>();

	for(int i = 0; i < features.size(); i++){
		Feature feature = features.get(i);
		Object statistic = statistics.get(i);

		if(addIndicator){
			Feature indicatorFeature = ImputerUtil.encodeIndicatorFeature(feature, missingValues, encoder);

			indicatorFeatures.add(indicatorFeature);
		}

		feature = ImputerUtil.encodeFeature(feature, addIndicator, missingValues, statistic, missingValueTreatment, encoder);

		result.add(feature);
	}

	if(addIndicator){
		result.addAll(indicatorFeatures);
	}

	return result;
}
 
Example #6
Source File: InputFieldUtil.java    From jpmml-evaluator with GNU Affero General Public License v3.0 5 votes vote down vote up
static
private ScalarValue performMissingValueTreatment(InputTypeInfo typeInfo){
	MiningField miningField = typeInfo.getMiningField();

	MissingValueTreatmentMethod missingValueTreatmentMethod = miningField.getMissingValueTreatment();
	if(missingValueTreatmentMethod == null){
		missingValueTreatmentMethod = MissingValueTreatmentMethod.AS_IS;
	}

	switch(missingValueTreatmentMethod){
		case AS_IS:
		case AS_MEAN:
		case AS_MODE:
		case AS_MEDIAN:
		case AS_VALUE:
			return createMissingInputValue(typeInfo);
		case RETURN_INVALID:
			Field<?> field = typeInfo.getField();

			Object missingValueReplacement = miningField.getMissingValueReplacement();
			if(missingValueReplacement != null){
				throw new MisplacedAttributeException(miningField, PMMLAttributes.MININGFIELD_MISSINGVALUEREPLACEMENT, missingValueReplacement);
			}

			throw new InvalidResultException("Field " + PMMLException.formatKey(field.getName()) + " requires user input value", miningField);
		default:
			throw new UnsupportedAttributeException(miningField, missingValueTreatmentMethod);
	}
}
 
Example #7
Source File: ImputerModelConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	ImputerModel transformer = getTransformer();

	Double missingValue = transformer.getMissingValue();
	String strategy = transformer.getStrategy();
	Dataset<Row> surrogateDF = transformer.surrogateDF();

	MissingValueTreatmentMethod missingValueTreatmentMethod = parseStrategy(strategy);

	List<Row> surrogateRows = surrogateDF.collectAsList();
	if(surrogateRows.size() != 1){
		throw new IllegalArgumentException();
	}

	Row surrogateRow = surrogateRows.get(0);

	InOutMode inputMode = getInputMode();

	List<Feature> result = new ArrayList<>();

	String[] inputCols = inputMode.getInputCols(transformer);
	for(String inputCol : inputCols){
		Feature feature = encoder.getOnlyFeature(inputCol);

		Field<?> field = feature.getField();

		if(field instanceof DataField){
			DataField dataField = (DataField)field;

			Object surrogate = surrogateRow.getAs(inputCol);

			encoder.addDecorator(dataField, new MissingValueDecorator(missingValueTreatmentMethod, surrogate));

			if(missingValue != null && !missingValue.isNaN()){
				PMMLUtil.addValues(dataField, Collections.singletonList(missingValue), Value.Property.MISSING);
			}
		} else

		{
			throw new IllegalArgumentException();
		}

		result.add(feature);
	}

	return result;
}
 
Example #8
Source File: ImputerUtil.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
static
public Feature encodeFeature(Feature feature, Boolean addIndicator, Object missingValue, Object replacementValue, MissingValueTreatmentMethod missingValueTreatmentMethod, SkLearnEncoder encoder){
	Field<?> field = feature.getField();

	if(field instanceof DataField && !addIndicator){
		DataField dataField = (DataField)field;

		encoder.addDecorator(dataField, new MissingValueDecorator(missingValueTreatmentMethod, replacementValue));

		if(missingValue != null){
			PMMLUtil.addValues(dataField, Collections.singletonList(missingValue), Value.Property.MISSING);
		}

		return feature;
	} // End if

	if((field instanceof DataField) || (field instanceof DerivedField)){
		Expression expression = feature.ref();

		if(missingValue != null){
			expression = PMMLUtil.createApply(PMMLFunctions.EQUAL, expression, PMMLUtil.createConstant(missingValue, feature.getDataType()));
		} else

		{
			expression = PMMLUtil.createApply(PMMLFunctions.ISMISSING, expression);
		}

		expression = PMMLUtil.createApply(PMMLFunctions.IF)
			.addExpressions(expression)
			.addExpressions(PMMLUtil.createConstant(replacementValue, feature.getDataType()), feature.ref());

		DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("imputer", feature), field.getOpType(), field.getDataType(), expression);

		DataType dataType = derivedField.getDataType();
		switch(dataType){
			case INTEGER:
			case FLOAT:
			case DOUBLE:
				return new ContinuousFeature(encoder, derivedField);
			case STRING:
				return new StringFeature(encoder, derivedField);
			default:
				return new ObjectFeature(encoder, derivedField.getName(), derivedField.getDataType());
		}
	} else

	{
		throw new IllegalArgumentException();
	}
}
 
Example #9
Source File: Domain.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	MissingValueTreatmentMethod missingValueTreatment = DomainUtil.parseMissingValueTreatment(getMissingValueTreatment());
	Object missingValueReplacement = getMissingValueReplacement();
	List<?> missingValues = getMissingValues();

	if(missingValueReplacement != null){

		if(missingValueTreatment == null){
			missingValueTreatment = MissingValueTreatmentMethod.AS_VALUE;
		}
	}

	InvalidValueTreatmentMethod invalidValueTreatment = DomainUtil.parseInvalidValueTreatment(getInvalidValueTreatment());
	Object invalidValueReplacement = getInvalidValueReplacement();

	if(invalidValueReplacement != null){

		if(invalidValueTreatment == null){
			invalidValueTreatment = InvalidValueTreatmentMethod.AS_IS;
		}
	}

	for(Feature feature : features){
		WildcardFeature wildcardFeature = asWildcardFeature(feature);

		DataField dataField = wildcardFeature.getField();

		DataType dataType = dataField.getDataType();

		if(missingValueTreatment != null){
			Object pmmlMissingValueReplacement = (missingValueReplacement != null ? standardizeValue(dataType, missingValueReplacement) : null);

			encoder.addDecorator(dataField, new MissingValueDecorator(missingValueTreatment, pmmlMissingValueReplacement));
		} // End if

		if(missingValues != null){
			PMMLUtil.addValues(dataField, standardizeValues(dataType, missingValues), Value.Property.MISSING);
		} // End if

		if(invalidValueTreatment != null){
			Object pmmlInvalidValueReplacement = (invalidValueReplacement != null ? standardizeValue(dataType, invalidValueReplacement) : null);

			encoder.addDecorator(dataField, new InvalidValueDecorator(invalidValueTreatment, pmmlInvalidValueReplacement));
		}
	}

	return features;
}