org.apache.spark.mllib.recommendation.MatrixFactorizationModel Java Examples
The following examples show how to use
org.apache.spark.mllib.recommendation.MatrixFactorizationModel.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ALSTest.java From DDF with Apache License 2.0 | 6 votes |
@Ignore public void TestALS() throws DDFException { createTableRatings(); DDF ratings = manager.sql2ddf("select userid, movieid, score from ratings", false); int rank = 3; double lambda = 10; int iterNum = 15; MatrixFactorizationModel model = (MatrixFactorizationModel) ratings.ML.train("collaborativeFiltering", rank, iterNum, lambda).getRawModel(); double r = model.predict(1, 4); System.out.println(">>>RATING: " + r); manager.shutdown(); }
Example #2
Source File: Evaluation.java From oryx with Apache License 2.0 | 6 votes |
/** * Computes root mean squared error of {@link Rating#rating()} versus predicted value. */ static double rmse(MatrixFactorizationModel mfModel, JavaRDD<Rating> testData) { JavaPairRDD<Tuple2<Integer,Integer>,Double> testUserProductValues = testData.mapToPair(rating -> new Tuple2<>(new Tuple2<>(rating.user(), rating.product()), rating.rating())); @SuppressWarnings("unchecked") RDD<Tuple2<Object,Object>> testUserProducts = (RDD<Tuple2<Object,Object>>) (RDD<?>) testUserProductValues.keys().rdd(); JavaRDD<Rating> predictions = testData.wrapRDD(mfModel.predict(testUserProducts)); double mse = predictions.mapToPair( rating -> new Tuple2<>(new Tuple2<>(rating.user(), rating.product()), rating.rating()) ).join(testUserProductValues).values().mapToDouble(valuePrediction -> { double diff = valuePrediction._1() - valuePrediction._2(); return diff * diff; }).mean(); return Math.sqrt(mse); }
Example #3
Source File: JavaALS.java From SparkDemo with MIT License | 5 votes |
public static void main(String[] args) { if (args.length < 4) { System.err.println( "Usage: JavaALS <ratings_file> <rank> <iterations> <output_dir> [<blocks>]"); System.exit(1); } SparkConf sparkConf = new SparkConf().setAppName("JavaALS"); int rank = Integer.parseInt(args[1]); int iterations = Integer.parseInt(args[2]); String outputDir = args[3]; int blocks = -1; if (args.length == 5) { blocks = Integer.parseInt(args[4]); } JavaSparkContext sc = new JavaSparkContext(sparkConf); JavaRDD<String> lines = sc.textFile(args[0]); JavaRDD<Rating> ratings = lines.map(new ParseRating()); MatrixFactorizationModel model = ALS.train(ratings.rdd(), rank, iterations, 0.01, blocks); model.userFeatures().toJavaRDD().map(new FeaturesToString()).saveAsTextFile( outputDir + "/userFeatures"); model.productFeatures().toJavaRDD().map(new FeaturesToString()).saveAsTextFile( outputDir + "/productFeatures"); System.out.println("Final user/product features written to " + outputDir); sc.stop(); }
Example #4
Source File: ALSUpdate.java From oryx with Apache License 2.0 | 5 votes |
private static MatrixFactorizationModel pmmlToMFModel(JavaSparkContext sparkContext, PMML pmml, Path modelParentPath, Broadcast<Map<String,Integer>> bUserIDToIndex, Broadcast<Map<String,Integer>> bItemIDToIndex) { String xPathString = AppPMMLUtils.getExtensionValue(pmml, "X"); String yPathString = AppPMMLUtils.getExtensionValue(pmml, "Y"); JavaPairRDD<String,float[]> userRDD = readFeaturesRDD(sparkContext, new Path(modelParentPath, xPathString)); JavaPairRDD<String,float[]> productRDD = readFeaturesRDD(sparkContext, new Path(modelParentPath, yPathString)); int rank = userRDD.first()._2().length; return new MatrixFactorizationModel( rank, readAndConvertFeatureRDD(userRDD, bUserIDToIndex), readAndConvertFeatureRDD(productRDD, bItemIDToIndex)); }
Example #5
Source File: Evaluation.java From oryx with Apache License 2.0 | 5 votes |
private static JavaPairRDD<Integer,Iterable<Rating>> predictAll( MatrixFactorizationModel mfModel, JavaRDD<Rating> data, JavaPairRDD<Integer,Integer> userProducts) { @SuppressWarnings("unchecked") RDD<Tuple2<Object,Object>> userProductsRDD = (RDD<Tuple2<Object,Object>>) (RDD<?>) userProducts.rdd(); return data.wrapRDD(mfModel.predict(userProductsRDD)).groupBy(Rating::user); }
Example #6
Source File: CollabFilterCassandra7.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | 5 votes |
public MatrixFactorizationModel train(JavaSparkContext sparkCtx, CassandraConnector cassandraConnector) { CassandraJavaRDD<CassandraRow> trainingRdd = javaFunctions(sparkCtx).cassandraTable(RatingDO.EMPLOYERRATINGS_KEYSPACE, RatingDO.RATINGS_TABLE); JavaRDD<Rating> trainingJavaRdd = trainingRdd.map(new org.apache.spark.api.java.function.Function<CassandraRow, Rating>() { @Override public Rating call(CassandraRow trainingRow) throws Exception { return new Rating(trainingRow.getInt(RatingDO.USER_COL), trainingRow.getInt(RatingDO.PRODUCT_COL), trainingRow.getDouble(RatingDO.RATING_COL)); } }); MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(trainingJavaRdd), RANK, ITER, LAMBDA); return model; }
Example #7
Source File: CollabFilterCassandra7.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | 5 votes |
public JavaRDD<Rating> predict(MatrixFactorizationModel model, CassandraJavaRDD<CassandraRow> validationsCassRdd) { RDD<Tuple2<Object, Object>> validationsRdd = JavaRDD.toRDD(validationsCassRdd.map(new org.apache.spark.api.java.function.Function<CassandraRow, Tuple2<Object, Object>>() { @Override public Tuple2<Object, Object> call(CassandraRow validationRow) throws Exception { return new Tuple2<Object, Object>(validationRow.getInt(RatingDO.USER_COL), validationRow.getInt(RatingDO.PRODUCT_COL)); } })); JavaRDD<Rating> predictionJavaRdd = model.predict(validationsRdd).toJavaRDD(); return predictionJavaRdd; }
Example #8
Source File: CollabFilterCassandraDriver.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | 5 votes |
double trainAndValidate(int version) throws InstantiationException, IllegalAccessException, ClassNotFoundException { final ICollabFilterCassandra cfc; String className = "collabfilter.CollabFilterCassandra" + version; cfc = (ICollabFilterCassandra) Class.forName(className).newInstance(); try (Session session = this.cassandraConnector.openSession()) { MatrixFactorizationModel model = cfc.train(this.sparkCtx, this.cassandraConnector); CassandraJavaRDD<CassandraRow> validationsCassRdd = javaFunctions(this.sparkCtx).cassandraTable(RatingDO.EMPLOYERRATINGS_KEYSPACE, RatingDO.VALIDATION_TABLE); JavaRDD<Rating> predictionJavaRdd = cfc.predict(model, validationsCassRdd); double rmse = cfc.validate(predictionJavaRdd, validationsCassRdd); System.out.println(cfc.resultsReport(predictionJavaRdd, validationsCassRdd, rmse)); return rmse; } }
Example #9
Source File: ALSUpdate.java From oryx with Apache License 2.0 | 4 votes |
@Override public PMML buildModel(JavaSparkContext sparkContext, JavaRDD<String> trainData, List<?> hyperParameters, Path candidatePath) { int features = (Integer) hyperParameters.get(0); double lambda = (Double) hyperParameters.get(1); double alpha = (Double) hyperParameters.get(2); double epsilon = Double.NaN; if (logStrength) { epsilon = (Double) hyperParameters.get(3); } Preconditions.checkArgument(features > 0); Preconditions.checkArgument(lambda >= 0.0); Preconditions.checkArgument(alpha > 0.0); if (logStrength) { Preconditions.checkArgument(epsilon > 0.0); } JavaRDD<String[]> parsedRDD = trainData.map(MLFunctions.PARSE_FN); parsedRDD.cache(); Map<String,Integer> userIDIndexMap = buildIDIndexMapping(parsedRDD, true); Map<String,Integer> itemIDIndexMap = buildIDIndexMapping(parsedRDD, false); log.info("Broadcasting ID-index mappings for {} users, {} items", userIDIndexMap.size(), itemIDIndexMap.size()); Broadcast<Map<String,Integer>> bUserIDToIndex = sparkContext.broadcast(userIDIndexMap); Broadcast<Map<String,Integer>> bItemIDToIndex = sparkContext.broadcast(itemIDIndexMap); JavaRDD<Rating> trainRatingData = parsedToRatingRDD(parsedRDD, bUserIDToIndex, bItemIDToIndex); trainRatingData = aggregateScores(trainRatingData, epsilon); ALS als = new ALS() .setRank(features) .setIterations(iterations) .setLambda(lambda) .setCheckpointInterval(5); if (implicit) { als = als.setImplicitPrefs(true).setAlpha(alpha); } RDD<Rating> trainingRatingDataRDD = trainRatingData.rdd(); trainingRatingDataRDD.cache(); MatrixFactorizationModel model = als.run(trainingRatingDataRDD); trainingRatingDataRDD.unpersist(false); bUserIDToIndex.unpersist(); bItemIDToIndex.unpersist(); parsedRDD.unpersist(); Broadcast<Map<Integer,String>> bUserIndexToID = sparkContext.broadcast(invertMap(userIDIndexMap)); Broadcast<Map<Integer,String>> bItemIndexToID = sparkContext.broadcast(invertMap(itemIDIndexMap)); PMML pmml = mfModelToPMML(model, features, lambda, alpha, epsilon, implicit, logStrength, candidatePath, bUserIndexToID, bItemIndexToID); unpersist(model); bUserIndexToID.unpersist(); bItemIndexToID.unpersist(); return pmml; }
Example #10
Source File: ALSUpdate.java From oryx with Apache License 2.0 | 4 votes |
@Override public double evaluate(JavaSparkContext sparkContext, PMML model, Path modelParentPath, JavaRDD<String> testData, JavaRDD<String> trainData) { JavaRDD<String[]> parsedTestRDD = testData.map(MLFunctions.PARSE_FN); parsedTestRDD.cache(); Map<String,Integer> userIDToIndex = buildIDIndexOneWayMap(model, parsedTestRDD, true); Map<String,Integer> itemIDToIndex = buildIDIndexOneWayMap(model, parsedTestRDD, false); log.info("Broadcasting ID-index mappings for {} users, {} items", userIDToIndex.size(), itemIDToIndex.size()); Broadcast<Map<String,Integer>> bUserIDToIndex = sparkContext.broadcast(userIDToIndex); Broadcast<Map<String,Integer>> bItemIDToIndex = sparkContext.broadcast(itemIDToIndex); JavaRDD<Rating> testRatingData = parsedToRatingRDD(parsedTestRDD, bUserIDToIndex, bItemIDToIndex); double epsilon = Double.NaN; if (logStrength) { epsilon = Double.parseDouble(AppPMMLUtils.getExtensionValue(model, "epsilon")); } testRatingData = aggregateScores(testRatingData, epsilon); MatrixFactorizationModel mfModel = pmmlToMFModel(sparkContext, model, modelParentPath, bUserIDToIndex, bItemIDToIndex); parsedTestRDD.unpersist(); double eval; if (implicit) { double auc = Evaluation.areaUnderCurve(sparkContext, mfModel, testRatingData); log.info("AUC: {}", auc); eval = auc; } else { double rmse = Evaluation.rmse(mfModel, testRatingData); log.info("RMSE: {}", rmse); eval = -rmse; } unpersist(mfModel); bUserIDToIndex.unpersist(); bItemIDToIndex.unpersist(); return eval; }
Example #11
Source File: ALSUpdate.java From oryx with Apache License 2.0 | 4 votes |
/** * There is no actual serialization of a massive factored matrix model into PMML. * Instead, we create an ad-hoc serialization where the model just contains pointers * to files that contain the matrix data, as Extensions. */ private static PMML mfModelToPMML(MatrixFactorizationModel model, int features, double lambda, double alpha, double epsilon, boolean implicit, boolean logStrength, Path candidatePath, Broadcast<Map<Integer,String>> bUserIndexToID, Broadcast<Map<Integer,String>> bItemIndexToID) { Function<double[],float[]> doubleArrayToFloats = d -> { float[] f = new float[d.length]; for (int i = 0; i < f.length; i++) { f[i] = (float) d[i]; } return f; }; JavaPairRDD<Integer,float[]> userFeaturesRDD = massageToIntKey(model.userFeatures()).mapValues(doubleArrayToFloats); JavaPairRDD<Integer,float[]> itemFeaturesRDD = massageToIntKey(model.productFeatures()).mapValues(doubleArrayToFloats); saveFeaturesRDD(userFeaturesRDD, new Path(candidatePath, "X"), bUserIndexToID); saveFeaturesRDD(itemFeaturesRDD, new Path(candidatePath, "Y"), bItemIndexToID); PMML pmml = PMMLUtils.buildSkeletonPMML(); AppPMMLUtils.addExtension(pmml, "X", "X/"); AppPMMLUtils.addExtension(pmml, "Y", "Y/"); AppPMMLUtils.addExtension(pmml, "features", features); AppPMMLUtils.addExtension(pmml, "lambda", lambda); AppPMMLUtils.addExtension(pmml, "implicit", implicit); if (implicit) { AppPMMLUtils.addExtension(pmml, "alpha", alpha); } AppPMMLUtils.addExtension(pmml, "logStrength", logStrength); if (logStrength) { AppPMMLUtils.addExtension(pmml, "epsilon", epsilon); } addIDsExtension(pmml, "XIDs", userFeaturesRDD, bUserIndexToID.value()); addIDsExtension(pmml, "YIDs", itemFeaturesRDD, bItemIndexToID.value()); return pmml; }
Example #12
Source File: CollabFilterCassandra8.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | 4 votes |
public MatrixFactorizationModel train(JavaSparkContext sparkCtx, CassandraConnector cassandraConnector) { CassandraJavaRDD<CassandraRow> trainingRdd = javaFunctions(sparkCtx).cassandraTable(RatingDO.EMPLOYERRATINGS_KEYSPACE, RatingDO.RATINGS_TABLE); JavaRDD<Rating> trainingJavaRdd = trainingRdd.map(trainingRow -> new Rating(trainingRow.getInt(RatingDO.USER_COL), trainingRow.getInt(RatingDO.PRODUCT_COL), trainingRow.getDouble(RatingDO.RATING_COL))); MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(trainingJavaRdd), RANK, ITER, LAMBDA); return model; }
Example #13
Source File: CollabFilterCassandra8.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | 4 votes |
public JavaRDD<Rating> predict(MatrixFactorizationModel model, CassandraJavaRDD<CassandraRow> validationsCassRdd) { RDD<Tuple2<Object, Object>> validationsRdd = JavaRDD.toRDD(validationsCassRdd.map(validationRow -> new Tuple2<Object, Object>(validationRow.getInt(RatingDO.USER_COL), validationRow.getInt(RatingDO.PRODUCT_COL)))); JavaRDD<Rating> predictionJavaRdd = model.predict(validationsRdd).toJavaRDD(); return predictionJavaRdd; }
Example #14
Source File: ALSUpdate.java From oryx with Apache License 2.0 | 2 votes |
/** * Manually unpersists the RDDs that are persisted inside a model. * * @param model model whose RDDs were persisted */ private static void unpersist(MatrixFactorizationModel model) { model.userFeatures().unpersist(false); model.productFeatures().unpersist(false); }
Example #15
Source File: ICollabFilterCassandra.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | votes |
MatrixFactorizationModel train(JavaSparkContext sparkCtx, CassandraConnector cassandraConnector);
Example #16
Source File: ICollabFilterCassandra.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | votes |
JavaRDD<Rating> predict(MatrixFactorizationModel model, CassandraJavaRDD<CassandraRow> validationsCassRdd);