Example #1
Source File:    From SparkDemo with MIT License 5 votes vote down vote up
public static void main(String[] args) {
  SparkSession spark = SparkSession

  // $example on$
  JavaRDD<Rating> ratingsRDD = spark
    .read().textFile(Constant.LOCAL_FILE_PREX +"data/mllib/als/sample_movielens_ratings.txt").javaRDD()
    .map(new Function<String, Rating>() {
      public Rating call(String str) {
        return Rating.parseRating(str);
  Dataset<Row> ratings = spark.createDataFrame(ratingsRDD, Rating.class);
  Dataset<Row>[] splits = ratings.randomSplit(new double[]{0.8, 0.2});
  Dataset<Row> training = splits[0];
  Dataset<Row> test = splits[1];

  // Build the recommendation model using ALS on the training data
  ALS als = new ALS()
  ALSModel model =;

  // Evaluate the model by computing the RMSE on the test data
  Dataset<Row> predictions = model.transform(test);

  RegressionEvaluator evaluator = new RegressionEvaluator()
  Double rmse = evaluator.evaluate(predictions);
  System.out.println("Root-mean-square error = " + rmse);
  // $example off$
Example #2
Source File:    From Spark_ALS with MIT License 4 votes vote down vote up
public static void main(String[] args) {
    SparkConf conf = new SparkConf().setAppName("JavaALSExample").setMaster("local");
    JavaSparkContext jsc = new JavaSparkContext(conf);
    SQLContext sqlContext = new SQLContext(jsc);

    JavaRDD<Rating> ratingsRDD = jsc.textFile("data/sample_movielens_ratings.txt")
            .map(new Function<String, Rating>() {
                public Rating call(String str) {
                    return Rating.parseRating(str);
    Dataset<Row> ratings = sqlContext.createDataFrame(ratingsRDD, Rating.class);
    Dataset<Row>[] splits = ratings.randomSplit(new double[]{0.8, 0.2}); // //对数据进行分割,80%为训练样例,剩下的为测试样例。
    Dataset<Row> training = splits[0];
    Dataset<Row> test = splits[1];

    // Build the recommendation model using ALS on the training data
    ALS als = new ALS().setMaxIter(5) // 设置迭代次数
            .setRegParam(0.01) // //正则化参数,使每次迭代平滑一些,此数据集取0.1好像错误率低一些。
    ALSModel model =; // //调用算法开始训练

    Dataset<Row> itemFactors = model.itemFactors();;
    Dataset<Row> userFactors = model.userFactors();;

    // Evaluate the model by computing the RMSE on the test data
    Dataset<Row> rawPredictions = model.transform(test); //对测试数据进行预测
    Dataset<Row> predictions = rawPredictions
            .withColumn("rating", rawPredictions.col("rating").cast(DataTypes.DoubleType))
            .withColumn("prediction", rawPredictions.col("prediction").cast(DataTypes.DoubleType));

    RegressionEvaluator evaluator = new RegressionEvaluator().setMetricName("rmse").setLabelCol("rating")
    Double rmse = evaluator.evaluate(predictions);"Root-mean-square error = {} ", rmse);
