import org.apache.spark.SparkContext import org.apache.spark.mllib.fpm.FPGrowth /** * Created by ubuntu on 3/12/16. */ object SampleFPGrowthApp { def main(args: Array[String]) { val transactions = Seq( "r z h k p", "z y x w v u t s", "s x o n r", "x z y m t s q e", "z", "x z y r q t p") .map(_.split(" ")) val sc = new SparkContext("local[2]", "Chapter 5 App") val rdd = sc.parallelize(transactions, 2).cache() val fpg = new FPGrowth() val model = fpg .setMinSupport(0.2) .setNumPartitions(1) .run(rdd) model.freqItemsets.collect().foreach { itemset => println(itemset.items.mkString("[", ",", "]") + ", " + itemset.freq) } } }