org.apache.spark.ml.feature.Binarizer Java Examples

The following examples show how to use org.apache.spark.ml.feature.Binarizer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JavaBinarizerExample.java    From SparkDemo with MIT License 6 votes vote down vote up
public static void main(String[] args) {
  SparkSession spark = SparkSession
    .builder()
    .appName("JavaBinarizerExample")
    .getOrCreate();

  // $example on$
  List<Row> data = Arrays.asList(
    RowFactory.create(0, 0.1),
    RowFactory.create(1, 0.8),
    RowFactory.create(2, 0.2)
  );
  StructType schema = new StructType(new StructField[]{
    new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
    new StructField("feature", DataTypes.DoubleType, false, Metadata.empty())
  });
  Dataset<Row> continuousDataFrame = spark.createDataFrame(data, schema);

  Binarizer binarizer = new Binarizer()
    .setInputCol("feature")
    .setOutputCol("binarized_feature")
    .setThreshold(0.5);

  Dataset<Row> binarizedDataFrame = binarizer.transform(continuousDataFrame);

  System.out.println("Binarizer output with Threshold = " + binarizer.getThreshold());
  binarizedDataFrame.show();
  // $example off$

  spark.stop();
}
 
Example #2
Source File: BinarizerConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 4 votes vote down vote up
public BinarizerConverter(Binarizer transformer){
	super(transformer);
}
 
Example #3
Source File: BinarizerConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 3 votes vote down vote up
@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	Binarizer transformer = getTransformer();

	Double threshold = transformer.getThreshold();

	InOutMode inputMode = getInputMode();

	List<Feature> result = new ArrayList<>();

	String[] inputCols = inputMode.getInputCols(transformer);
	for(int i = 0; i < inputCols.length; i++){
		String inputCol = inputCols[i];

		Feature feature = encoder.getOnlyFeature(inputCol);

		ContinuousFeature continuousFeature = feature.toContinuousFeature();

		Apply apply = new Apply(PMMLFunctions.IF)
			.addExpressions(PMMLUtil.createApply(PMMLFunctions.LESSOREQUAL, continuousFeature.ref(), PMMLUtil.createConstant(threshold)))
			.addExpressions(PMMLUtil.createConstant(0d), PMMLUtil.createConstant(1d));

		DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i), OpType.CATEGORICAL, DataType.DOUBLE, apply);

		result.add(new IndexFeature(encoder, derivedField, Arrays.asList(0d, 1d)));
	}

	return result;
}