package com.github.jongwook import org.apache.spark.mllib.recommendation.Rating import org.scalatest._ import scala.io.Source object MovieLensLoader { /** load ml-100k dataset */ def load(): Seq[Rating] = { val input = getClass.getResource("u.data").openStream() try { Source.fromInputStream(input).getLines().toArray.map { _.split("\t") match { case Array(user, item, rating, timestamp) => Rating(user.toInt, item.toInt, rating.toDouble) } } } finally { input.close() } } } class MovieLensLoader extends FlatSpec with Matchers { "MovieLens Loader" should "load the ml-100k data" in { val data = MovieLensLoader.load() data.size should be (100000) data.map(_.rating).max should be (5.0) data.map(_.rating).min should be (1.0) } }