org.apache.spark.ml.feature.MinMaxScaler Java Examples

The following examples show how to use org.apache.spark.ml.feature.MinMaxScaler. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JavaMinMaxScalerExample.java    From SparkDemo with MIT License 5 votes vote down vote up
public static void main(String[] args) {
  SparkSession spark = SparkSession
    .builder()
    .appName("JavaMinMaxScalerExample")
    .getOrCreate();

  // $example on$
  List<Row> data = Arrays.asList(
      RowFactory.create(0, Vectors.dense(1.0, 0.1, -1.0)),
      RowFactory.create(1, Vectors.dense(2.0, 1.1, 1.0)),
      RowFactory.create(2, Vectors.dense(3.0, 10.1, 3.0))
  );
  StructType schema = new StructType(new StructField[]{
      new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
      new StructField("features", new VectorUDT(), false, Metadata.empty())
  });
  Dataset<Row> dataFrame = spark.createDataFrame(data, schema);

  MinMaxScaler scaler = new MinMaxScaler()
    .setInputCol("features")
    .setOutputCol("scaledFeatures");

  // Compute summary statistics and generate MinMaxScalerModel
  MinMaxScalerModel scalerModel = scaler.fit(dataFrame);

  // rescale each feature to range [min, max].
  Dataset<Row> scaledData = scalerModel.transform(dataFrame);
  System.out.println("Features scaled to range: [" + scaler.getMin() + ", "
      + scaler.getMax() + "]");
  scaledData.select("features", "scaledFeatures").show();
  // $example off$

  spark.stop();
}
 
Example #2
Source File: DataPreview.java    From StockPrediction with MIT License 5 votes vote down vote up
public static void main (String[] args) throws IOException {
    SparkSession spark = SparkSession.builder().master("local").appName("DataProcess").getOrCreate();
    String filename = "prices-split-adjusted.csv";
    String symbol = "GOOG";
    // load data from csv file
    Dataset<Row> data = spark.read().format("csv").option("header", true)
            .load(new ClassPathResource(filename).getFile().getAbsolutePath())
            //.filter(functions.col("symbol").equalTo(symbol))
            //.drop("date").drop("symbol")
            .withColumn("openPrice", functions.col("open").cast("double")).drop("open")
            .withColumn("closePrice", functions.col("close").cast("double")).drop("close")
            .withColumn("lowPrice", functions.col("low").cast("double")).drop("low")
            .withColumn("highPrice", functions.col("high").cast("double")).drop("high")
            .withColumn("volumeTmp", functions.col("volume").cast("double")).drop("volume")
            .toDF("date", "symbol", "open", "close", "low", "high", "volume");

    data.show();

    Dataset<Row> symbols = data.select("date", "symbol").groupBy("symbol").agg(functions.count("date").as("count"));
    System.out.println("Number of Symbols: " + symbols.count());
    symbols.show();

    VectorAssembler assembler = new VectorAssembler()
            .setInputCols(new String[] {"open", "low", "high", "volume", "close"})
            .setOutputCol("features");

    data = assembler.transform(data).drop("open", "low", "high", "volume", "close");

    data = new MinMaxScaler().setMin(0).setMax(1)
            .setInputCol("features").setOutputCol("normalizedFeatures")
            .fit(data).transform(data)
            .drop("features").toDF("features");
}
 
Example #3
Source File: MinMaxScalerBridgeTest.java    From spark-transformers with Apache License 2.0 5 votes vote down vote up
@Test
public void testMinMaxScaler() {
    //prepare data
    JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
            RowFactory.create(1.0, Vectors.dense(data[0])),
            RowFactory.create(2.0, Vectors.dense(data[1])),
            RowFactory.create(3.0, Vectors.dense(data[2])),
            RowFactory.create(4.0, Vectors.dense(data[3]))
    ));

    StructType schema = new StructType(new StructField[]{
            new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
            new StructField("features", new VectorUDT(), false, Metadata.empty())
    });

    Dataset<Row> df = spark.createDataFrame(jrdd, schema);

    //train model in spark
    MinMaxScalerModel sparkModel = new MinMaxScaler()
            .setInputCol("features")
            .setOutputCol("scaled")
            .setMin(-5)
            .setMax(5)
            .fit(df);


    //Export model, import it back and get transformer
    byte[] exportedModel = ModelExporter.export(sparkModel);
    final Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);

    //compare predictions
    List<Row> sparkOutput = sparkModel.transform(df).orderBy("label").select("features", "scaled").collectAsList();
    assertCorrectness(sparkOutput, expected, transformer);
}
 
Example #4
Source File: MinMaxScalerBridgeTest.java    From spark-transformers with Apache License 2.0 5 votes vote down vote up
@Test
public void testStandardScaler() {
    //prepare data
    List<LabeledPoint> localTraining = Arrays.asList(
            new LabeledPoint(1.0, Vectors.dense(data[0])),
            new LabeledPoint(2.0, Vectors.dense(data[1])),
            new LabeledPoint(3.0, Vectors.dense(data[2])),
            new LabeledPoint(3.0, Vectors.dense(data[3])));
    DataFrame df = sqlContext.createDataFrame(sc.parallelize(localTraining), LabeledPoint.class);

    //train model in spark
    MinMaxScalerModel sparkModel = new MinMaxScaler()
            .setInputCol("features")
            .setOutputCol("scaled")
            .setMin(-5)
            .setMax(5)
            .fit(df);


    //Export model, import it back and get transformer
    byte[] exportedModel = ModelExporter.export(sparkModel, df);
    final Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel);

    //compare predictions
    Row[] sparkOutput = sparkModel.transform(df).orderBy("label").select("features", "scaled").collect();
    assertCorrectness(sparkOutput, expected, transformer);
}