Java Code Examples for org.apache.spark.ml.linalg.Vectors#dense()

The following examples show how to use org.apache.spark.ml.linalg.Vectors#dense() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RDDConverterUtils.java    From systemds with Apache License 2.0 5 votes vote down vote up
private static Vector createVector(MatrixBlock row) {
	if( row.isEmptyBlock(false) ) //EMPTY SPARSE ROW
		return Vectors.sparse(row.getNumColumns(), new int[0], new double[0]);
	else if( row.isInSparseFormat() ) //SPARSE ROW
		return Vectors.sparse(row.getNumColumns(), 
				row.getSparseBlock().indexes(0), row.getSparseBlock().values(0));
	else // DENSE ROW
		return Vectors.dense(row.getDenseBlockValues());
}
 
Example 2
Source File: JavaElementwiseProductExample.java    From SparkDemo with MIT License 5 votes vote down vote up
public static void main(String[] args) {
  SparkSession spark = SparkSession
    .builder()
    .appName("JavaElementwiseProductExample")
    .getOrCreate();

  // $example on$
  // Create some vector data; also works for sparse vectors
  List<Row> data = Arrays.asList(
    RowFactory.create("a", Vectors.dense(1.0, 2.0, 3.0)),
    RowFactory.create("b", Vectors.dense(4.0, 5.0, 6.0))
  );

  List<StructField> fields = new ArrayList<StructField>(2);
  fields.add(DataTypes.createStructField("id", DataTypes.StringType, false));
  fields.add(DataTypes.createStructField("vector", new VectorUDT(), false));

  StructType schema = DataTypes.createStructType(fields);

  Dataset<Row> dataFrame = spark.createDataFrame(data, schema);

  Vector transformingVector = Vectors.dense(0.0, 1.0, 2.0);

  ElementwiseProduct transformer = new ElementwiseProduct()
    .setScalingVec(transformingVector)
    .setInputCol("vector")
    .setOutputCol("transformedVector");

  // Batch transform the vectors to create new column:
  transformer.transform(dataFrame).show();
  // $example off$
  spark.stop();
}
 
Example 3
Source File: SimplePredictionFromTextFile.java    From net.jgp.labs.spark with Apache License 2.0 5 votes vote down vote up
private void start() {
  SparkSession spark = SparkSession.builder().appName(
      "Simple prediction from Text File").master("local").getOrCreate();

  spark.udf().register("vectorBuilder", new VectorBuilder(), new VectorUDT());

  String filename = "data/tuple-data-file.csv";
  StructType schema = new StructType(
      new StructField[] { new StructField("_c0", DataTypes.DoubleType, false,
          Metadata.empty()),
          new StructField("_c1", DataTypes.DoubleType, false, Metadata
              .empty()),
          new StructField("features", new VectorUDT(), true, Metadata
              .empty()), });

  Dataset<Row> df = spark.read().format("csv").schema(schema).option("header",
      "false")
      .load(filename);
  df = df.withColumn("valuefeatures", df.col("_c0")).drop("_c0");
  df = df.withColumn("label", df.col("_c1")).drop("_c1");
  df.printSchema();

  df = df.withColumn("features", callUDF("vectorBuilder", df.col(
      "valuefeatures")));
  df.printSchema();
  df.show();

  LinearRegression lr = new LinearRegression().setMaxIter(20);// .setRegParam(1).setElasticNetParam(1);

  // Fit the model to the data.
  LinearRegressionModel model = lr.fit(df);

  // Given a dataset, predict each point's label, and show the results.
  model.transform(df).show();

  LinearRegressionTrainingSummary trainingSummary = model.summary();
  System.out.println("numIterations: " + trainingSummary.totalIterations());
  System.out.println("objectiveHistory: " + Vectors.dense(trainingSummary
      .objectiveHistory()));
  trainingSummary.residuals().show();
  System.out.println("RMSE: " + trainingSummary.rootMeanSquaredError());
  System.out.println("r2: " + trainingSummary.r2());

  double intercept = model.intercept();
  System.out.println("Interesection: " + intercept);
  double regParam = model.getRegParam();
  System.out.println("Regression parameter: " + regParam);
  double tol = model.getTol();
  System.out.println("Tol: " + tol);
  Double feature = 7.0;
  Vector features = Vectors.dense(feature);
  double p = model.predict(features);

  System.out.println("Prediction for feature " + feature + " is " + p);
  System.out.println(8 * regParam + intercept);
}
 
Example 4
Source File: RDDConverterUtils.java    From systemds with Apache License 2.0 5 votes vote down vote up
private static Vector createVector(MatrixBlock row) {
	if( row.isEmptyBlock(false) ) //EMPTY SPARSE ROW
		return Vectors.sparse(row.getNumColumns(), new int[0], new double[0]);
	else if( row.isInSparseFormat() ) //SPARSE ROW
		return Vectors.sparse(row.getNumColumns(), 
				row.getSparseBlock().indexes(0), row.getSparseBlock().values(0));
	else // DENSE ROW
		return Vectors.dense(row.getDenseBlockValues());
}
 
Example 5
Source File: JavaBucketedRandomProjectionLSHExample.java    From SparkDemo with MIT License 4 votes vote down vote up
public static void main(String[] args) {
  SparkSession spark = SparkSession
    .builder()
    .appName("JavaBucketedRandomProjectionLSHExample")
    .getOrCreate();

  // $example on$
  List<Row> dataA = Arrays.asList(
    RowFactory.create(0, Vectors.dense(1.0, 1.0)),
    RowFactory.create(1, Vectors.dense(1.0, -1.0)),
    RowFactory.create(2, Vectors.dense(-1.0, -1.0)),
    RowFactory.create(3, Vectors.dense(-1.0, 1.0))
  );

  List<Row> dataB = Arrays.asList(
      RowFactory.create(4, Vectors.dense(1.0, 0.0)),
      RowFactory.create(5, Vectors.dense(-1.0, 0.0)),
      RowFactory.create(6, Vectors.dense(0.0, 1.0)),
      RowFactory.create(7, Vectors.dense(0.0, -1.0))
  );

  StructType schema = new StructType(new StructField[]{
    new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
    new StructField("keys", new VectorUDT(), false, Metadata.empty())
  });
  Dataset<Row> dfA = spark.createDataFrame(dataA, schema);
  Dataset<Row> dfB = spark.createDataFrame(dataB, schema);

  Vector key = Vectors.dense(1.0, 0.0);

  BucketedRandomProjectionLSH mh = new BucketedRandomProjectionLSH()
    .setBucketLength(2.0)
    .setNumHashTables(3)
    .setInputCol("keys")
    .setOutputCol("values");

  BucketedRandomProjectionLSHModel model = mh.fit(dfA);

  // Feature Transformation
  model.transform(dfA).show();
  // Cache the transformed columns
  Dataset<Row> transformedA = model.transform(dfA).cache();
  Dataset<Row> transformedB = model.transform(dfB).cache();

  // Approximate similarity join
  model.approxSimilarityJoin(dfA, dfB, 1.5).show();
  model.approxSimilarityJoin(transformedA, transformedB, 1.5).show();
  // Self Join
  model.approxSimilarityJoin(dfA, dfA, 2.5).filter("datasetA.id < datasetB.id").show();

  // Approximate nearest neighbor search
  model.approxNearestNeighbors(dfA, key, 2).show();
  model.approxNearestNeighbors(transformedA, key, 2).show();
  // $example off$

  spark.stop();
}
 
Example 6
Source File: VectorBuilder.java    From net.jgp.labs.spark with Apache License 2.0 4 votes vote down vote up
@Override
public Vector call(Double t1) throws Exception {
  return Vectors.dense(t1);
}