package examples

import org.apache.spark.{ SparkContext, SparkConf }
import org.apache.spark.rdd.RDD

/**
 * Test code for accumulators.
 * Use cases for accumulators are mostly as counters or for diagnostic 
 * information.
 * For more details visit, 
 * http://spark.apache.org/docs/latest/programming-guide.html#understanding-closures-a-nameclosureslinka
 *
 */
object TestAccumulators {
	def main(args: Array[String]): Unit = {

		val sc = new SparkContext(new SparkConf().setAppName("TestAccumulatorsJob"))

		val logRDD = sc.textFile("file:/media/linux-1/spark-dev/data/sample.log")

		usingAccumulators(sc, logRDD)

		usingRDDTransformations(sc, logRDD)
	}

	def usingAccumulators(sc: SparkContext, rdd: RDD[String]): Unit = {
		val errorLines = sc.accumulator(0, "Error_Logs")
		val infoLines = sc.accumulator(0, "Info_Logs")
		val warnLines = sc.accumulator(0, "Warn_Logs")
		val totalLines = sc.accumulator(0, "Total_Lines")

		/* It is recommended to use accumulators inside actions only. This guarantees 
		 * that the update is applied only once in spite of job restarts.
		 * For accumulators inside transformations, the lineage may be recomputed 
		 * several times, hence Spark does not recommended accumulators inside 
		 * transformations.
		 * 
		 * The code below uses accumulators inside an action.
		 */
		rdd.foreach { line =>
			if (line.length() > 0) totalLines += 1
			if (line.startsWith("error:")) errorLines += 1
			else if (line.startsWith("info:")) infoLines += 1
			else if (line.startsWith("warn:")) warnLines += 1
		}

		println(s">>> [Using Accumulators] Total: ${totalLines.value}, Error: ${errorLines.value}, Warnings: ${warnLines.value}, Info: ${infoLines.value}")
	}

	def usingRDDTransformations(sc: SparkContext, rdd: RDD[String]): Unit = {
		val errorLines = rdd.filter(_.startsWith("error:")).count()
		val infoLines = rdd.filter(_.startsWith("info:")).count()
		val warnLines = rdd.filter(_.startsWith("warn:")).count()

		println(s">>> [Using RDD Transformations] Error: $errorLines, Warnings: $warnLines, Info: $infoLines")
	}
}