org.jpmml.converter.WildcardFeature Java Examples

The following examples show how to use org.jpmml.converter.WildcardFeature. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PMMLPipeline.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
static
private List<Feature> initFeatures(List<String> activeFields, OpType opType, DataType dataType, SkLearnEncoder encoder){
	List<Feature> result = new ArrayList<>();

	for(String activeField : activeFields){
		DataField dataField = encoder.createDataField(FieldName.create(activeField), opType, dataType);

		result.add(new WildcardFeature(encoder, dataField));
	}

	return result;
}
 
Example #2
Source File: DiscreteDomain.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	features = super.encodeFeatures(features, encoder);

	Boolean withData = getWithData();
	Boolean withStatistics = getWithStatistics();

	ClassDictUtil.checkSize(1, features);

	Feature feature = features.get(0);

	WildcardFeature wildcardFeature = asWildcardFeature(feature);

	if(withData){
		List<?> data = getData();

		feature = encode(wildcardFeature, data);
	} else

	{
		feature = encode(wildcardFeature, Collections.emptyList());
	} // End if

	if(withStatistics){
		Map<String, ?> counts = extractMap(getCounts(), 0);
		Object[] discrStats = getDiscrStats();

		UnivariateStats univariateStats = new UnivariateStats()
			.setField(wildcardFeature.getName())
			.setCounts(createCounts(counts))
			.setDiscrStats(createDiscrStats(wildcardFeature.getDataType(), discrStats));

		encoder.putUnivariateStats(univariateStats);
	}

	return Collections.singletonList(feature);
}
 
Example #3
Source File: CategoricalDomain.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public Feature encode(WildcardFeature wildcardFeature, List<?> values){
	PMMLEncoder encoder = wildcardFeature.getEncoder();

	if(values == null || values.isEmpty()){
		DataField dataField = (DataField)encoder.getField(wildcardFeature.getName());

		dataField.setOpType(OpType.CATEGORICAL);

		return new ObjectFeature(encoder, dataField.getName(), dataField.getDataType());
	}

	return wildcardFeature.toCategoricalFeature(standardizeValues(wildcardFeature.getDataType(), values));
}
 
Example #4
Source File: Domain.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
static
protected WildcardFeature asWildcardFeature(Feature feature){

	if(feature instanceof WildcardFeature){
		WildcardFeature wildcardFeature = (WildcardFeature)feature;

		return wildcardFeature;
	}

	throw new IllegalArgumentException("Field " + feature.getName() + " is not decorable");
}
 
Example #5
Source File: MultiOneHotEncoder.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	List<List<?>> categories = getCategories();

	ClassDictUtil.checkSize(categories, features);

	Object drop = getDrop();
	List<Integer> dropIdx = (drop != null ? getDropIdx() : null);

	List<Feature> result = new ArrayList<>();

	for(int i = 0; i < features.size(); i++){
		Feature feature = features.get(i);
		List<?> featureCategories = categories.get(i);

		if(feature instanceof CategoricalFeature){
			CategoricalFeature categoricalFeature = (CategoricalFeature)feature;

			ClassDictUtil.checkSize(featureCategories, categoricalFeature.getValues());

			featureCategories = categoricalFeature.getValues();
		} else

		if(feature instanceof ObjectFeature){
			ObjectFeature objectFeature = (ObjectFeature)feature;
		} else

		if(feature instanceof WildcardFeature){
			WildcardFeature wildcardFeature = (WildcardFeature)feature;

			feature = wildcardFeature.toCategoricalFeature(featureCategories);
		} else

		{
			throw new IllegalArgumentException();
		} // End if

		if(dropIdx != null){
			// Unbox to primitive value in order to ensure correct List#remove(int) vs. List#remove(Object) method resolution
			int index = dropIdx.get(i);

			featureCategories = new ArrayList<>(featureCategories);
			featureCategories.remove(index);
		}

		for(int j = 0; j < featureCategories.size(); j++){
			Object featureCategory = featureCategories.get(j);

			result.add(new BinaryFeature(encoder, feature, featureCategory));
		}
	}

	return result;
}
 
Example #6
Source File: DataFrameMapper.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public List<Feature> initializeFeatures(SkLearnEncoder encoder){
	Object _default = getDefault();
	List<Object[]> rows = getFeatures();

	if(!(Boolean.FALSE).equals(_default)){
		throw new IllegalArgumentException("Attribute \'" + ClassDictUtil.formatMember(this, "default") + "\' must be set to the 'False' value");
	}

	List<Feature> result = new ArrayList<>();

	for(Object[] row : rows){
		List<Feature> rowFeatures = new ArrayList<>();

		List<String> columns = getColumnList(row);
		for(String column : columns){
			FieldName name = FieldName.create(column);

			DataField dataField = encoder.getDataField(name);
			if(dataField == null){
				dataField = encoder.createDataField(name);
			}

			rowFeatures.add(new WildcardFeature(encoder, dataField));
		}

		List<Transformer> transformers = getTransformerList(row);
		for(Transformer transformer : transformers){
			rowFeatures = transformer.updateAndEncodeFeatures(rowFeatures, encoder);
		}

		if(row.length > 2){
			Map<String, ?> options = (Map)row[2];

			String alias = (String)options.get("alias");
			if(alias != null){

				for(int i = 0; i < rowFeatures.size(); i++){
					Feature rowFeature = rowFeatures.get(i);

					encoder.renameFeature(rowFeature, rowFeatures.size() > 1 ? FieldName.create(alias + "_" + i) : FieldName.create(alias));
				}
			}
		}

		result.addAll(rowFeatures);
	}

	return result;
}
 
Example #7
Source File: DiscreteDomain.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
abstract
public Feature encode(WildcardFeature wildcardFeature, List<?> values);
 
Example #8
Source File: Domain.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	MissingValueTreatmentMethod missingValueTreatment = DomainUtil.parseMissingValueTreatment(getMissingValueTreatment());
	Object missingValueReplacement = getMissingValueReplacement();
	List<?> missingValues = getMissingValues();

	if(missingValueReplacement != null){

		if(missingValueTreatment == null){
			missingValueTreatment = MissingValueTreatmentMethod.AS_VALUE;
		}
	}

	InvalidValueTreatmentMethod invalidValueTreatment = DomainUtil.parseInvalidValueTreatment(getInvalidValueTreatment());
	Object invalidValueReplacement = getInvalidValueReplacement();

	if(invalidValueReplacement != null){

		if(invalidValueTreatment == null){
			invalidValueTreatment = InvalidValueTreatmentMethod.AS_IS;
		}
	}

	for(Feature feature : features){
		WildcardFeature wildcardFeature = asWildcardFeature(feature);

		DataField dataField = wildcardFeature.getField();

		DataType dataType = dataField.getDataType();

		if(missingValueTreatment != null){
			Object pmmlMissingValueReplacement = (missingValueReplacement != null ? standardizeValue(dataType, missingValueReplacement) : null);

			encoder.addDecorator(dataField, new MissingValueDecorator(missingValueTreatment, pmmlMissingValueReplacement));
		} // End if

		if(missingValues != null){
			PMMLUtil.addValues(dataField, standardizeValues(dataType, missingValues), Value.Property.MISSING);
		} // End if

		if(invalidValueTreatment != null){
			Object pmmlInvalidValueReplacement = (invalidValueReplacement != null ? standardizeValue(dataType, invalidValueReplacement) : null);

			encoder.addDecorator(dataField, new InvalidValueDecorator(invalidValueTreatment, pmmlInvalidValueReplacement));
		}
	}

	return features;
}
 
Example #9
Source File: ContinuousDomain.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	features = super.encodeFeatures(features, encoder);

	OutlierTreatmentMethod outlierTreatment = DomainUtil.parseOutlierTreatment(getOutlierTreatment());

	Number lowValue;
	Number highValue;

	if(outlierTreatment != null){

		switch(outlierTreatment){
			case AS_EXTREME_VALUES:
			case AS_MISSING_VALUES:
				lowValue = getLowValue();
				highValue = getHighValue();
				break;
			default:
				lowValue = null;
				highValue = null;
		}
	} else

	{
		lowValue = null;
		highValue = null;
	}

	Boolean withData = getWithData();
	Boolean withStatistics = getWithStatistics();

	List<? extends Number> dataMin = null;
	List<? extends Number> dataMax = null;

	if(withData){
		dataMin = getDataMin();
		dataMax = getDataMax();

		ClassDictUtil.checkSize(features, dataMin, dataMax);
	}

	List<Feature> result = new ArrayList<>();

	for(int i = 0; i < features.size(); i++){
		Feature feature = features.get(i);

		WildcardFeature wildcardFeature = asWildcardFeature(feature);

		DataField dataField = wildcardFeature.getField();

		if(outlierTreatment != null){
			encoder.addDecorator(dataField, new OutlierDecorator(outlierTreatment, lowValue, highValue));
		} // End if

		if(withData){
			Interval interval = new Interval(Interval.Closure.CLOSED_CLOSED)
				.setLeftMargin(dataMin.get(i))
				.setRightMargin(dataMax.get(i));

			dataField.addIntervals(interval);

			feature = wildcardFeature.toContinuousFeature();
		} // End if

		if(withStatistics){
			Map<String, ?> counts = extractMap(getCounts(), i);
			Map<String, ?> numericInfo = extractMap(getNumericInfo(), i);

			UnivariateStats univariateStats = new UnivariateStats()
				.setField(dataField.getName())
				.setCounts(createCounts(counts))
				.setNumericInfo(createNumericInfo(wildcardFeature.getDataType(), numericInfo));

			encoder.putUnivariateStats(univariateStats);
		}

		result.add(feature);
	}

	return result;
}
 
Example #10
Source File: TemporalDomain.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	features = super.encodeFeatures(features, encoder);

	List<Feature> result = new ArrayList<>();

	for(int i = 0; i < features.size(); i++){
		Feature feature = features.get(i);

		WildcardFeature wildcardFeature = asWildcardFeature(feature);

		DataField dataField = wildcardFeature.getField();

		dataField.setOpType(OpType.ORDINAL);

		feature = new ObjectFeature(encoder, dataField.getName(), dataField.getDataType());

		result.add(feature);
	}

	return result;
}
 
Example #11
Source File: ScalerTest.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
void assertTransformedFeature(Transformer transformer, String function){
	SkLearnEncoder encoder = new SkLearnEncoder();

	DataField dataField = encoder.createDataField(FieldName.create("x"));

	Feature inputFeature = new WildcardFeature(encoder, dataField);
	Feature outputFeature = Iterables.getOnlyElement(transformer.encodeFeatures(Collections.singletonList(inputFeature), encoder));

	assertNotSame(inputFeature, outputFeature);

	DerivedField derivedField = (DerivedField)encoder.getField(outputFeature.getName());

	Apply apply = (Apply)derivedField.getExpression();

	assertEquals(function, apply.getFunction());
}
 
Example #12
Source File: OneHotEncoderTest.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Test
public void encode(){
	SkLearnEncoder encoder = new SkLearnEncoder();

	DataField dataField = encoder.createDataField(FieldName.create("x"), OpType.CATEGORICAL, DataType.INTEGER);

	Feature inputFeature = new WildcardFeature(encoder, dataField);

	assertEquals(Arrays.asList(), PMMLUtil.getValues(dataField));

	OneHotEncoder oneHotEncoder = new OneHotEncoder("sklearn.preprocessing.data", "OneHotEncoder");
	oneHotEncoder.put("n_values_", 3);

	List<Feature> outputFeatures = oneHotEncoder.encodeFeatures(Collections.singletonList(inputFeature), encoder);
	for(int i = 0; i < 3; i++){
		BinaryFeature outputFeature = (BinaryFeature)outputFeatures.get(i);

		assertEquals(i, outputFeature.getValue());
	}

	assertEquals(Arrays.asList(0, 1, 2), PMMLUtil.getValues(dataField));
}
 
Example #13
Source File: ScalerTest.java    From jpmml-sklearn with GNU Affero General Public License v3.0 3 votes vote down vote up
void assertSameFeature(Transformer transformer){
	SkLearnEncoder encoder = new SkLearnEncoder();

	DataField dataField = encoder.createDataField(FieldName.create("x"));

	Feature inputFeature = new WildcardFeature(encoder, dataField);
	Feature outputFeature = Iterables.getOnlyElement(transformer.encodeFeatures(Collections.singletonList(inputFeature), encoder));

	assertSame(inputFeature, outputFeature);
}
 
Example #14
Source File: OneHotEncoder.java    From jpmml-sklearn with GNU Affero General Public License v3.0 2 votes vote down vote up
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	List<? extends Number> values = getValues();

	ClassDictUtil.checkSize(1, features);

	Feature feature = features.get(0);

	List<Feature> result = new ArrayList<>();

	if(feature instanceof CategoricalFeature){
		CategoricalFeature categoricalFeature = (CategoricalFeature)feature;

		ClassDictUtil.checkSize(values, categoricalFeature.getValues());

		for(int i = 0; i < values.size(); i++){
			result.add(new BinaryFeature(encoder, categoricalFeature, categoricalFeature.getValue(i)));
		}
	} else

	if(feature instanceof WildcardFeature){
		WildcardFeature wildcardFeature = (WildcardFeature)feature;

		List<Integer> categories = new ArrayList<>();

		for(int i = 0; i < values.size(); i++){
			Number value = values.get(i);

			Integer category = ValueUtil.asInt(value);

			categories.add(category);

			result.add(new BinaryFeature(encoder, wildcardFeature, category));
		}

		wildcardFeature.toCategoricalFeature(categories);
	} else

	{
		throw new IllegalArgumentException();
	}

	return result;
}
 
Example #15
Source File: ImputerTest.java    From jpmml-sklearn with GNU Affero General Public License v3.0 2 votes vote down vote up
@Test
public void encodeCategorical(){
	FieldName name = FieldName.create("x");
	FieldName imputedName = FieldName.create("imputer(x)");

	Imputer imputer = new Imputer("sklearn.preprocessing.imputation", "Imputer");
	imputer.put("strategy", "most_frequent");
	imputer.put("missing_values", "NaN");
	imputer.put("statistics_", 0);

	SkLearnEncoder encoder = new SkLearnEncoder();

	Feature feature = encodeFeature(name.getValue(), Arrays.asList(imputer), encoder);

	assertNotNull(encoder.getDataField(name));
	assertNull(encoder.getDerivedField(imputedName));

	List<Decorator> decorators = encoder.getDecorators(name);

	assertEquals(1, decorators.size());

	assertTrue(feature instanceof WildcardFeature);
	assertEquals(name, feature.getName());

	NDArray array = new NDArray();
	array.put("data", Arrays.asList(0, 1, 2, 3, 4, 5, 6));
	array.put("fortran_order", Boolean.FALSE);

	CategoricalDomain categoricalDomain = new CategoricalDomain("sklearn2pmml.decoration", "CategoricalDomain");
	categoricalDomain.put("invalid_value_treatment", "as_is");
	categoricalDomain.put("data_", array);

	encoder = new SkLearnEncoder();

	feature = encodeFeature(name.getValue(), Arrays.asList(categoricalDomain, imputer), encoder);

	assertNotNull(encoder.getDataField(name));
	assertNull(encoder.getDerivedField(imputedName));

	decorators = encoder.getDecorators(name);

	assertEquals(2, decorators.size());

	assertTrue(feature instanceof CategoricalFeature);
	assertEquals(name, feature.getName());
}
 
Example #16
Source File: ImputerTest.java    From jpmml-sklearn with GNU Affero General Public License v3.0 2 votes vote down vote up
@Test
public void encodeContinuous(){
	FieldName name = FieldName.create("x");
	FieldName imputedName = FieldName.create("imputer(x)");
	FieldName binarizedName = FieldName.create("binarizer(x)");
	FieldName imputedBinarizedName = FieldName.create("imputer(" + binarizedName.getValue() + ")");

	Imputer imputer = new Imputer("sklearn.preprocessing.imputation", "Imputer");
	imputer.put("strategy", "mean");
	imputer.put("missing_values", -999);
	imputer.put("statistics_", 0.5d);

	SkLearnEncoder encoder = new SkLearnEncoder();

	Feature feature = encodeFeature(name.getValue(), Arrays.asList(imputer), encoder);

	assertNotNull(encoder.getDataField(name));
	assertNull(encoder.getDerivedField(imputedName));

	List<Decorator> decorators = encoder.getDecorators(name);

	assertEquals(1, decorators.size());

	assertTrue(feature instanceof WildcardFeature);
	assertEquals(name, feature.getName());

	ContinuousDomain continuousDomain = new ContinuousDomain("sklearn2pmml.decoration", "ContinuousDomain");
	continuousDomain.put("invalid_value_treatment", "return_invalid");
	continuousDomain.put("data_min_", 0d);
	continuousDomain.put("data_max_", 1d);

	encoder = new SkLearnEncoder();

	feature = encodeFeature(name.getValue(), Arrays.asList(continuousDomain, imputer), encoder);

	assertNotNull(encoder.getDataField(name));
	assertNull(encoder.getDerivedField(imputedName));

	decorators = encoder.getDecorators(name);

	assertEquals(2, decorators.size());

	assertTrue(feature instanceof ContinuousFeature);
	assertEquals(name, feature.getName());

	Binarizer binarizer = new Binarizer("sklearn.preprocessing.data", "Binarizer");
	binarizer.put("threshold", 1d / 3d);

	encoder = new SkLearnEncoder();

	feature = encodeFeature(name.getValue(), Arrays.asList(continuousDomain, binarizer, imputer), encoder);

	assertNotNull(encoder.getDataField(name));
	assertNotNull(encoder.getDerivedField(binarizedName));
	assertNotNull(encoder.getDerivedField(imputedBinarizedName));

	decorators = encoder.getDecorators(name);

	assertEquals(1, decorators.size());

	assertTrue(feature instanceof ContinuousFeature);
	assertEquals(imputedBinarizedName, feature.getName());
}