package com.example;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.CommandLineRunner;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.PropertySource;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.EnableTransactionManagement;

import java.util.Arrays;

@SpringBootApplication
@EnableTransactionManagement
public class SpringSparkDemoApplication implements CommandLineRunner
{
    @Autowired
    SparkIntegrationService sparkIntegrationService;

    public static void main(String[] args)
    {
        SpringApplication.run(SpringSparkDemoApplication.class, args);
    }

    @Override
    public void run(String... args)
    {
        sparkIntegrationService.testSparkOperations();
    }
}

@Configuration
@PropertySource("classpath:application.properties")
class SparkConfig
{

    private final Logger logger = LoggerFactory.getLogger(this.getClass());

    @Value("${app.name}")
    private String appName;

    @Value("${spark.home}")
    private String sparkHome;

    @Value("${master.uri}")
    private String masterUri;

    @Bean
    public SparkConf sparkConf()
    {
        return new SparkConf()
            .setAppName(appName)
            .setSparkHome(sparkHome)
            .setMaster(masterUri);
    }

    @Bean
    public JavaSparkContext javaSparkContext(SparkConf sparkConf)
    {
        return new JavaSparkContext(sparkConf);
    }

    @Bean
    public SparkSession spark(SparkConf sparkConf)
    {
        SparkSession sparkSession = SparkSession.builder()
            .sparkContext(javaSparkContext(sparkConf).sc())
            .config(sparkConf)
            .getOrCreate();

        logger.info("Using Spark Version {}", sparkSession.version());

        return sparkSession;
    }

}

@Service
class SparkIntegrationService
{

    @Autowired
    SparkSession spark;

    public void testSparkOperations()
    {
        Dataset<Row> csvDataSet1 = spark.read().option("header", true).csv("./src/main/resources/employees1.csv");

        System.out.println("Schema of the CSV file:");
        csvDataSet1.printSchema();

        System.out.println("Columns in CSV file:");
        System.out.println(Arrays.toString(csvDataSet1.columns()));

        System.out.println("Total data set 1 count: " + csvDataSet1.count());

        System.out.println("First 5 rows:");
        Row[] head = (Row[]) csvDataSet1.head(5);
        System.out.println(Arrays.toString(head));

        Dataset<Row> csvDataSet2 = spark.read().option("header", true).csv("./src/main/resources/employees2.csv");

        System.out.println("Total data set 2 count: " + csvDataSet2.count());
        System.out.println();

        Dataset<Row> dataSetUnion = csvDataSet1.union(csvDataSet2);
        System.out.println("Total data set union count: " + dataSetUnion.count());
    }
}