package com.packt.sfjd.ch8;

import org.apache.log4j.Level;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.TypedColumn;
import org.apache.spark.sql.api.java.UDF2;
import org.apache.spark.sql.types.DataTypes;

public class UDFExample {

	public static void main(String[] args) {
		//Window Specific property if Hadoop is not instaalled or HADOOP_HOME is not set
		 System.setProperty("hadoop.home.dir", "E:\\hadoop");
		
		 //Build a Spark Session	
	      SparkSession sparkSession = SparkSession
	      .builder()
	      .master("local")
		  .config("spark.sql.warehouse.dir","file:///E:/hadoop/warehouse")
	      .appName("EdgeBuilder")
	      .getOrCreate();
	      Logger rootLogger = LogManager.getRootLogger();
		  rootLogger.setLevel(Level.WARN); 
		// Read the CSV data
			 Dataset<Row> emp_ds = sparkSession.read()
					 .format("com.databricks.spark.csv")
	   		         .option("header", "true")
	   		         .option("inferSchema", "true")
	   		         .load("src/main/resources/employee.txt");    
	    		
		    UDF2 calcDays=new CalcDaysUDF();
		  //Registering the UDFs in Spark Session created above      
		    sparkSession.udf().register("calcDays", calcDays, DataTypes.LongType);
		    
		    emp_ds.createOrReplaceTempView("emp_ds");
		    
		    emp_ds.printSchema();
		    emp_ds.show();
		    
		    sparkSession.sql("select calcDays(hiredate,'dd-MM-yyyy') from emp_ds").show();   
		    //Instantiate UDAF
		    AverageUDAF calcAvg= new AverageUDAF();
		    //Register UDAF to SparkSession
		    sparkSession.udf().register("calAvg", calcAvg);
		    //Use UDAF
		    sparkSession.sql("select deptno,calAvg(salary) from emp_ds group by deptno ").show(); 
		   
		    //
		    TypeSafeUDAF typeSafeUDAF=new TypeSafeUDAF();
		    
		    Dataset<Employee> emf = emp_ds.as(Encoders.bean(Employee.class));
		    emf.printSchema();
		    emf.show();
		    
		    TypedColumn<Employee, Double> averageSalary = typeSafeUDAF.toColumn().name("averageTypeSafe");
		    Dataset<Double> result = emf.select(averageSalary);
		   result.show();
		    

	}

}