org.jpmml.converter.Feature Java Examples

The following examples show how to use org.jpmml.converter.Feature. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: EncoderUtil.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
static
public Feature encodeIndexFeature(Feature feature, List<?> categories, DataType dataType, SkLearnEncoder encoder){
	List<Number> indexCategories = new ArrayList<>(categories.size());

	for(int i = 0; i < categories.size(); i++){

		switch(dataType){
			case INTEGER:
				indexCategories.add(i);
				break;
			case FLOAT:
				indexCategories.add((float)i);
				break;
			case DOUBLE:
				indexCategories.add((double)i);
				break;
			default:
				throw new IllegalArgumentException();
		}
	}

	return encodeIndexFeature(feature, categories, indexCategories, null, null, dataType, encoder);
}
 
Example #2
Source File: TreeUtil.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
static
private Schema toTreeModelSchema(DataType dataType, Schema schema){
	Function<Feature, Feature> function = new Function<Feature, Feature>(){

		@Override
		public Feature apply(Feature feature){

			if(feature instanceof BinaryFeature){
				BinaryFeature binaryFeature = (BinaryFeature)feature;

				return binaryFeature;
			} else

			{
				ContinuousFeature continuousFeature = feature.toContinuousFeature(dataType);

				return continuousFeature;
			}
		}
	};

	return schema.toTransformedSchema(function);
}
 
Example #3
Source File: EarthConverter.java    From jpmml-r with GNU Affero General Public License v3.0 6 votes vote down vote up
static
private Apply createHingeFunction(int dir, Feature feature, double cut){
	Expression expression;

	switch(dir){
		case -1:
			expression = PMMLUtil.createApply(PMMLFunctions.SUBTRACT, PMMLUtil.createConstant(cut), feature.ref());
			break;
		case 1:
			expression = PMMLUtil.createApply(PMMLFunctions.SUBTRACT, feature.ref(), PMMLUtil.createConstant(cut));
			break;
		default:
			throw new IllegalArgumentException();
	}

	return PMMLUtil.createApply(PMMLFunctions.MAX, expression, PMMLUtil.createConstant(0d));
}
 
Example #4
Source File: RegexTokenizerConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	RegexTokenizer transformer = getTransformer();

	if(!transformer.getGaps()){
		throw new IllegalArgumentException("Expected splitter mode, got token matching mode");
	} // End if

	if(transformer.getMinTokenLength() != 1){
		throw new IllegalArgumentException("Expected 1 as minimum token length, got " + transformer.getMinTokenLength() + " as minimum token length");
	}

	Feature feature = encoder.getOnlyFeature(transformer.getInputCol());

	Field<?> field = feature.getField();

	if(transformer.getToLowercase()){
		Apply apply = PMMLUtil.createApply(PMMLFunctions.LOWERCASE, feature.ref());

		field = encoder.createDerivedField(FeatureUtil.createName("lowercase", feature), OpType.CATEGORICAL, DataType.STRING, apply);
	}

	return Collections.singletonList(new DocumentFeature(encoder, field, transformer.getPattern()));
}
 
Example #5
Source File: CategoricalImputer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	Object fill = getFill();
	Object missingValues = getMissingValues();

	ClassDictUtil.checkSize(1, features);

	if(("NaN").equals(missingValues)){
		missingValues = null;
	}

	Feature feature = features.get(0);

	feature = ImputerUtil.encodeFeature(feature, false, missingValues, fill, MissingValueTreatmentMethod.AS_MODE, encoder);

	return Collections.singletonList(feature);
}
 
Example #6
Source File: ImputerUtil.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
static
public Feature encodeIndicatorFeature(Feature feature, Object missingValue, SkLearnEncoder encoder){
	Expression expression = feature.ref();

	if(missingValue != null){
		expression = PMMLUtil.createApply(PMMLFunctions.EQUAL, expression, PMMLUtil.createConstant(missingValue, feature.getDataType()));
	} else

	{
		expression = PMMLUtil.createApply(PMMLFunctions.ISMISSING, expression);
	}

	DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("missing_indicator", feature), OpType.CATEGORICAL, DataType.BOOLEAN, expression);

	return new BooleanFeature(encoder, derivedField);
}
 
Example #7
Source File: PowerFunctionTransformer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	Integer power = getPower();

	List<Feature> result = new ArrayList<>();

	for(Feature feature : features){

		if(feature instanceof BinaryFeature){
			BinaryFeature binaryFeature = (BinaryFeature)feature;

			result.add(binaryFeature);
		} else

		{
			ContinuousFeature continuousFeature = feature.toContinuousFeature();

			result.add(new PowerFeature(encoder, continuousFeature, power));
		}
	}

	return result;
}
 
Example #8
Source File: FormulaUtil.java    From jpmml-r with GNU Affero General Public License v3.0 6 votes vote down vote up
static
public void addFeatures(Formula formula, List<String> names, boolean allowInteractions, RExpEncoder encoder){

	for(int i = 0; i < names.size(); i++){
		String name = names.get(i);

		Feature feature;

		if(allowInteractions){
			feature = formula.resolveFeature(name);
		} else

		{
			feature = formula.resolveFeature(FieldName.create(name));
		}

		encoder.addFeature(feature);
	}
}
 
Example #9
Source File: MissingIndicator.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	List<Integer> featureIndices = getFeatureIndices();
	Object missingValues = getMissingValues();

	if((Double.valueOf(Double.NaN)).equals(missingValues)){
		missingValues = null;
	}

	List<Feature> result = new ArrayList<>();

	for(Integer featureIndex : featureIndices){
		Feature feature = features.get(featureIndex);

		feature = ImputerUtil.encodeIndicatorFeature(feature, missingValues, encoder);

		result.add(feature);
	}

	return result;
}
 
Example #10
Source File: Composite.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * @see Transformer
 */
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){

	if(!hasTransformers()){
		return features;
	}

	List<? extends Transformer> transformers = getTransformers();
	for(Transformer transformer : transformers){
		int numberOfFeatures = TransformerUtil.getNumberOfFeatures(transformer);

		if(numberOfFeatures > -1){
			ClassDictUtil.checkSize(numberOfFeatures, features);
		}

		features = transformer.updateAndEncodeFeatures(features, encoder);
	}

	return features;
}
 
Example #11
Source File: ReplaceTransformer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	String pattern = getPattern();
	String replacement = getReplacement();

	ClassDictUtil.checkSize(1, features);

	Feature feature = features.get(0);
	if(!(DataType.STRING).equals(feature.getDataType())){
		throw new IllegalArgumentException();
	}

	Apply apply = PMMLUtil.createApply(PMMLFunctions.REPLACE)
		.addExpressions(feature.ref())
		.addExpressions(PMMLUtil.createConstant(pattern, DataType.STRING), PMMLUtil.createConstant(replacement, DataType.STRING));

	DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("replace", feature), OpType.CATEGORICAL, DataType.STRING, apply);

	return Collections.singletonList(new StringFeature(encoder, derivedField));
}
 
Example #12
Source File: MatchesTransformer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	String pattern = getPattern();

	ClassDictUtil.checkSize(1, features);

	Feature feature = features.get(0);
	if(!(DataType.STRING).equals(feature.getDataType())){
		throw new IllegalArgumentException();
	}

	Apply apply = PMMLUtil.createApply(PMMLFunctions.MATCHES)
		.addExpressions(feature.ref())
		.addExpressions(PMMLUtil.createConstant(pattern, DataType.STRING));

	DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("matches", feature), OpType.CATEGORICAL, DataType.BOOLEAN, apply);

	return Collections.singletonList(new BooleanFeature(encoder, derivedField));
}
 
Example #13
Source File: FeatureList.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
public FeatureList(List<? extends Feature> features, List<String> names){
	super(features);

	if(names == null || features.size() != names.size()){
		throw new IllegalArgumentException();
	}

	setNames(names);
}
 
Example #14
Source File: StackingUtil.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
static
public PMMLEncoder getEncoder(List<? extends Feature> features){
	Set<PMMLEncoder> encoders = features.stream()
		.map(feature -> feature.getEncoder())
		.collect(Collectors.toSet());

	return Iterables.getOnlyElement(encoders);
}
 
Example #15
Source File: Initializer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){

	if(features.size() != 0){
		throw new IllegalArgumentException("Transformer \'" + getClassName() + "\' must be the first step of the pipeline");
	}

	return initializeFeatures(encoder);
}
 
Example #16
Source File: ExpressionTransformer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	Object dtype = getDType();
	String expr = getExpr();

	Scope scope = new DataFrameScope(FieldName.create("X"), features);

	Expression expression = ExpressionTranslator.translate(expr, scope);

	DataType dataType;

	if(dtype != null){
		dataType = TransformerUtil.getDataType(dtype);
	} else

	{
		if(ExpressionTranslator.isString(expression, scope)){
			dataType = DataType.STRING;
		} else

		{
			dataType = DataType.DOUBLE;
		}
	}

	OpType opType = TransformerUtil.getOpType(dataType);

	DerivedField derivedField = encoder.createDerivedField(FieldName.create("eval(" + expr + ")"), opType, dataType, expression);

	return Collections.singletonList(new ContinuousFeature(encoder, derivedField));
}
 
Example #17
Source File: ChiSqSelectorModelConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	ChiSqSelectorModel transformer = getTransformer();

	int[] indices = transformer.selectedFeatures();
	if(indices.length > 0){
		indices = indices.clone();

		Arrays.sort(indices);
	}

	return encoder.getFeatures(transformer.getFeaturesCol(), indices);
}
 
Example #18
Source File: SkLearnEncoder.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
public void renameFeature(Feature feature, FieldName renamedName){
	FieldName name = feature.getName();

	org.dmg.pmml.Field<?> pmmlField = getField(name);

	if(pmmlField instanceof DataField){
		throw new IllegalArgumentException("User input field " + name.getValue() + " cannot be renamed");
	}

	DerivedField derivedField = removeDerivedField(name);

	try {
		Field field = Feature.class.getDeclaredField("name");

		if(!field.isAccessible()){
			field.setAccessible(true);
		}

		field.set(feature, renamedName);
	} catch(ReflectiveOperationException roe){
		throw new RuntimeException(roe);
	}

	derivedField.setName(renamedName);

	addDerivedField(derivedField);
}
 
Example #19
Source File: Formula.java    From jpmml-r with GNU Affero General Public License v3.0 5 votes vote down vote up
private void putFeature(FieldName name, Feature feature){
	FieldName validName = RExpUtil.makeName(name);

	if(!(name).equals(validName)){
		this.validNames.put(validName, name);
	}

	this.features.put(name, feature);
}
 
Example #20
Source File: OneHotEncoderModelConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
static
public List<BinaryFeature> encodeFeature(PMMLEncoder encoder, Feature feature, List<?> values, boolean dropLast){
	List<BinaryFeature> result = new ArrayList<>();

	if(dropLast){
		values = values.subList(0, values.size() - 1);
	}

	for(Object value : values){
		result.add(new BinaryFeature(encoder, feature, value));
	}

	return result;
}
 
Example #21
Source File: IndexToStringConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	IndexToString transformer = getTransformer();

	DataField dataField = encoder.createDataField(formatName(transformer), OpType.CATEGORICAL, DataType.STRING, Arrays.asList(transformer.getLabels()));

	return Collections.singletonList(new CategoricalFeature(encoder, dataField));
}
 
Example #22
Source File: StringNormalizer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	String function = getFunction();
	Boolean trimBlanks = getTrimBlanks();

	if(function == null && !trimBlanks){
		return features;
	}

	List<Feature> result = new ArrayList<>();

	for(Feature feature : features){
		Expression expression = feature.ref();

		if(function != null){
			expression = PMMLUtil.createApply(translateFunction(function), expression);
		} // End if

		if(trimBlanks){
			expression = PMMLUtil.createApply(PMMLFunctions.TRIMBLANKS, expression);
		}

		Field<?> field = encoder.toCategorical(feature.getName(), Collections.emptyList());

		// XXX: Should have been set by the previous transformer
		field.setDataType(DataType.STRING);

		DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("normalize", feature), OpType.CATEGORICAL, DataType.STRING, expression);

		feature = new StringFeature(encoder, derivedField);

		result.add(feature);
	}

	return result;
}
 
Example #23
Source File: VectorAssemblerConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	VectorAssembler transformer = getTransformer();

	List<Feature> result = new ArrayList<>();

	String[] inputCols = transformer.getInputCols();
	for(String inputCol : inputCols){
		List<Feature> features = encoder.getFeatures(inputCol);

		result.addAll(features);
	}

	return result;
}
 
Example #24
Source File: Formula.java    From jpmml-r with GNU Affero General Public License v3.0 5 votes vote down vote up
public Feature resolveFeature(FieldName name){
	Feature feature = getFeature(name);

	if(feature == null){
		throw new IllegalArgumentException(name.getValue());
	}

	return feature;
}
 
Example #25
Source File: TfidfVectorizer.java    From jpmml-sklearn with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public Apply encodeApply(String function, Feature feature, int index, String term){
	TfidfTransformer transformer = getTransformer();

	Apply apply = super.encodeApply(function, feature, index, term);

	Boolean useIdf = transformer.getUseIdf();
	if(useIdf){
		Number weight = transformer.getWeight(index);

		apply.addExpressions(PMMLUtil.createConstant(weight));
	}

	return apply;
}
 
Example #26
Source File: MVRConverter.java    From jpmml-r with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public GeneralRegressionModel encodeModel(Schema schema){
	RGenericVector mvr = getObject();

	RDoubleVector coefficients = mvr.getDoubleElement("coefficients");
	RDoubleVector xMeans = mvr.getDoubleElement("Xmeans");
	RDoubleVector yMeans = mvr.getDoubleElement("Ymeans");
	RNumberVector<?> ncomp = mvr.getNumericElement("ncomp");

	RStringVector rowNames = coefficients.dimnames(0);
	RStringVector columnNames = coefficients.dimnames(1);
	RStringVector compNames = coefficients.dimnames(2);

	int rows = rowNames.size();
	int columns = columnNames.size();
	int components = compNames.size();

	List<? extends Feature> features = schema.getFeatures();

	List<Double> featureCoefficients = FortranMatrixUtil.getColumn(coefficients.getValues(), rows, (columns * components), 0 + (ValueUtil.asInt(ncomp.asScalar()) - 1));

	Double intercept = yMeans.getValue(0);

	for(int j = 0; j < rowNames.size(); j++){
		intercept -= (featureCoefficients.get(j) * xMeans.getValue(j));
	}

	GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERALIZED_LINEAR, MiningFunction.REGRESSION, ModelUtil.createMiningSchema(schema.getLabel()), null, null, null)
		.setLinkFunction(GeneralRegressionModel.LinkFunction.IDENTITY);

	GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, features, featureCoefficients, intercept, null);

	return generalRegressionModel;
}
 
Example #27
Source File: TermFeature.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
public TermFeature(PMMLEncoder encoder, DefineFunction defineFunction, Feature feature, String value){
	super(encoder, FieldName.create(defineFunction.getName() + "(" + value + ")"), defineFunction.getDataType());

	setDefineFunction(defineFunction);

	setFeature(feature);
	setValue(value);
}
 
Example #28
Source File: TermFeature.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
public Apply createApply(){
	DefineFunction defineFunction = getDefineFunction();
	Feature feature = getFeature();
	String value = getValue();

	Constant constant = PMMLUtil.createConstant(value, DataType.STRING);

	return PMMLUtil.createApply(defineFunction.getName(), feature.ref(), constant);
}
 
Example #29
Source File: RegressionTree.java    From pyramid with Apache License 2.0 5 votes vote down vote up
static
private Predicate encodePredicate(Feature feature, Node node, boolean left){
    FieldName name = feature.getName();
    SimplePredicate.Operator operator;
    String value;

    if(feature instanceof BinaryFeature){
        BinaryFeature binaryFeature = (BinaryFeature)feature;

        operator = (left ? SimplePredicate.Operator.NOT_EQUAL : SimplePredicate.Operator.EQUAL);
        value = binaryFeature.getValue();
    } else

    {
        ContinuousFeature continuousFeature = feature.toContinuousFeature();

        Number splitValue = node.getThreshold();

        DataType dataType = continuousFeature.getDataType();
        switch(dataType){
            case INTEGER:
                splitValue = (int)(splitValue.floatValue() + 1f);
                break;
            case FLOAT:
                break;
            default:
                throw new IllegalArgumentException();
        }

        operator = (left ? SimplePredicate.Operator.LESS_OR_EQUAL : SimplePredicate.Operator.GREATER_THAN);
        value = ValueUtil.formatValue(splitValue);
    }

    SimplePredicate simplePredicate = new SimplePredicate(name, operator)
            .setValue(value);

    return simplePredicate;
}
 
Example #30
Source File: PassThrough.java    From jpmml-sklearn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	return features;
}