package com.tomekl007.chapter_5 import com.tomekl007.UserTransaction import org.apache.spark.SparkContext import org.apache.spark.sql.SparkSession import org.scalatest.FunSuite import org.scalatest.Matchers._ import scala.collection.mutable import scala.collection.mutable.ArrayBuffer class TransformationsOnPairs extends FunSuite { val spark: SparkContext = SparkSession.builder().master("local[2]").getOrCreate().sparkContext test("should use transformation on k/v pair") { //given val keysWithValuesList = Array( UserTransaction("A", 100), UserTransaction("B", 4), UserTransaction("A", 100001), UserTransaction("B", 10), UserTransaction("C", 10) ) val data = spark.parallelize(keysWithValuesList) val keyed = data.keyBy(_.userId) //when val counted = keyed.countByKey() // keyed.combineByKey() // keyed.aggregateByKey() // keyed.foldByKey() // keyed.groupByKey() //then counted should contain theSameElementsAs Map("B" -> 2, "A" -> 2, "C" -> 1) } }