package org.gbif.occurrence.download.hive;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import freemarker.cache.ClassTemplateLoader;
import freemarker.template.Configuration;
import freemarker.template.Template;
import freemarker.template.TemplateException;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Map;
import java.util.TreeMap;

import static org.gbif.occurrence.download.hive.OccurrenceAvroHdfsTableDefinition.avroField;

/**
 * Generates HQL scripts dynamically which are used to create the download HDFS tables, and querying when a user issues
 * a download request.
 * <p/>
 * Rather than generating HQL only at runtime, scripts are generated at build time using a Maven
 * plugin, to aid testing, development and debugging.  Freemarker is used as a templating language
 * to allow rapid development, but the sections which are verbose, and subject to easy typos are controlled
 * by enumerations in code.  The same enumerations are used in many places in the codebase, including the
 * generation of the Hive table columns themselves.
 */
public class GenerateHQL {

  private static final String CREATE_TABLES_DIR = "create-tables/hive-scripts";
  private static final String DOWNLOAD_DIR = "download-workflow/dwca/hive-scripts";
  private static final String SIMPLE_CSV_DOWNLOAD_DIR = "download-workflow/simple-csv/hive-scripts";
  private static final String SIMPLE_AVRO_DOWNLOAD_DIR = "download-workflow/simple-avro/hive-scripts";
  private static final String SIMPLE_WITH_VERBATIM_AVRO_DOWNLOAD_DIR = "download-workflow/simple-with-verbatim-avro/hive-scripts";
  private static final String IUCN_DOWNLOAD_DIR = "download-workflow/iucn/hive-scripts";
  private static final String MAP_OF_LIFE_DOWNLOAD_DIR = "download-workflow/map-of-life/hive-scripts";
  private static final String AVRO_SCHEMAS_DIR = "create-tables/avro-schemas";

  private static final String FIELDS = "fields";

  private static final HiveQueries HIVE_QUERIES = new HiveQueries();
  private static final AvroQueries AVRO_QUERIES = new AvroQueries();
  private static final AvroSchemaQueries AVRO_SCHEMA_QUERIES = new AvroSchemaQueries();
  private static final SimpleAvroSchemaQueries SIMPLE_AVRO_SCHEMA_QUERIES = new SimpleAvroSchemaQueries();

  public static void main(String[] args) {
    try {
      Preconditions.checkState(1 == args.length, "Output path for HQL files is required");
      File outDir = new File(args[0]);
      Preconditions.checkState(outDir.exists() && outDir.isDirectory(), "Output directory must exist");

      // create the sub directories into which we will write
      File createTablesDir = new File(outDir, CREATE_TABLES_DIR);
      File downloadDir = new File(outDir, DOWNLOAD_DIR);
      File simpleCsvDownloadDir = new File(outDir, SIMPLE_CSV_DOWNLOAD_DIR);
      File simpleWithVerbatimAvroDownloadDir = new File(outDir, SIMPLE_WITH_VERBATIM_AVRO_DOWNLOAD_DIR);
      File simpleAvroDownloadDir = new File(outDir, SIMPLE_AVRO_DOWNLOAD_DIR);
      File iucnDownloadDir = new File(outDir, IUCN_DOWNLOAD_DIR);
      File mapOfLifeDownloadDir = new File(outDir, MAP_OF_LIFE_DOWNLOAD_DIR);
      File avroSchemasDir = new File(outDir, AVRO_SCHEMAS_DIR);

      createTablesDir.mkdirs();
      downloadDir.mkdirs();
      simpleCsvDownloadDir.mkdirs();
      simpleAvroDownloadDir.mkdirs();
      simpleWithVerbatimAvroDownloadDir.mkdirs();
      iucnDownloadDir.mkdirs();
      mapOfLifeDownloadDir.mkdirs();
      avroSchemasDir.mkdirs();

      Configuration cfg = new Configuration();
      cfg.setTemplateLoader(new ClassTemplateLoader(GenerateHQL.class, "/templates"));

      generateOccurrenceAvroSchema(avroSchemasDir);
      generateOccurrenceAvroTableHQL(cfg, createTablesDir);

      // generates HQL executed at actual download time (tightly coupled to table definitions above, hence this is
      // co-located)
      generateQueryHQL(cfg, downloadDir);
      generateSimpleCsvQueryHQL(cfg, simpleCsvDownloadDir);
      generateSimpleAvroQueryHQL(cfg, simpleAvroDownloadDir);
      generateSimpleAvroSchema(cfg, simpleAvroDownloadDir.getParentFile());
      generateSimpleWithVerbatimAvroQueryHQL(cfg, simpleWithVerbatimAvroDownloadDir);
      generateSimpleWithVerbatimAvroSchema(cfg, simpleWithVerbatimAvroDownloadDir.getParentFile());
      generateIucnQueryHQL(cfg, iucnDownloadDir);
      generateMapOfLifeQueryHQL(cfg, mapOfLifeDownloadDir);
      generateMapOfLifeSchema(cfg, mapOfLifeDownloadDir.getParentFile());

    } catch (Exception e) {
      // Hard exit for safety, and since this is used in build pipelines, any generation error could have
      // catastrophic effects - e.g. partially complete scripts being run, and resulting in inconsistent
      // data.
      System.err.println("*** Aborting JVM ***");
      System.err.println("Unexpected error building the templated HQL files.  "
                         + "Exiting JVM as a precaution, after dumping technical details.");
      e.printStackTrace();
      System.exit(-1);
    }

  }

  /**
   * Generates HQL which is used to create the Hive table, and creates an HDFS equivalent.
   */
  private static void generateOccurrenceAvroTableHQL(Configuration cfg, File outDir) throws IOException, TemplateException {

    try (FileWriter out = new FileWriter(new File(outDir, "create-occurrence-avro.q"))) {
      Template template = cfg.getTemplate("configure/create-occurrence-avro.ftl");
      Map<String, Object> data = ImmutableMap.of(FIELDS, OccurrenceHDFSTableDefinition.definition());
      template.process(data, out);
    }
  }

  private static void generateOccurrenceAvroSchema(File outDir) throws IOException {
    try (FileWriter out = new FileWriter(new File(outDir, "occurrence-hdfs-record.avsc"))) {
      out.write(OccurrenceAvroHdfsTableDefinition.avroDefinition().toString(Boolean.TRUE));
    }
  }

  /**
   * Generates the Hive query file used for DwCA downloads.
   */
  private static void generateQueryHQL(Configuration cfg, File outDir) throws IOException, TemplateException {
    try (FileWriter out = new FileWriter(new File(outDir, "execute-query.q"))) {
      Template template = cfg.getTemplate("download/execute-query.ftl");
      Map<String, Object> data = ImmutableMap.of(
        "verbatimFields", HIVE_QUERIES.selectVerbatimFields().values(),
        "interpretedFields", HIVE_QUERIES.selectInterpretedFields(false).values(),
        "initializedInterpretedFields", HIVE_QUERIES.selectInterpretedFields(true).values()
      );
      template.process(data, out);
    }
  }

  /**
   * Generates the Hive query file used for CSV downloads.
   */
  private static void generateSimpleCsvQueryHQL(Configuration cfg, File outDir) throws IOException, TemplateException {
    try (FileWriter out = new FileWriter(new File(outDir, "execute-simple-csv-query.q"))) {
      Template template = cfg.getTemplate("simple-csv-download/execute-simple-csv-query.ftl");

      Map<String, Object> data = ImmutableMap.of(FIELDS, HIVE_QUERIES.selectSimpleDownloadFields(true).values());
      template.process(data, out);
    }
  }

  /**
   * Generates the schema file used for simple AVRO downloads.
   */
  private static void generateSimpleAvroSchema(Configuration cfg, File outDir) throws IOException {
    try (FileWriter out = new FileWriter(new File(outDir, "simple-occurrence.avsc"))) {

      Map<String, InitializableField> fields = SIMPLE_AVRO_SCHEMA_QUERIES.selectSimpleDownloadFields(true);

      SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder
        .record("SimpleOccurrence")
        .namespace("org.gbif.occurrence.download.avro").fields();
      fields.values().forEach(initializableField -> avroField(builder, initializableField));
      Schema schema = builder.endRecord();

      out.write(schema.toString(true));
    }
  }

  /**
   * Generates the Hive query file used for simple AVRO downloads.
   */
  private static void generateSimpleAvroQueryHQL(Configuration cfg, File outDir) throws IOException, TemplateException {
    try (FileWriter out = new FileWriter(new File(outDir, "execute-simple-avro-query.q"))) {
      Template template = cfg.getTemplate("simple-avro-download/execute-simple-avro-query.ftl");
      Map<String, Object> data = ImmutableMap.of(FIELDS, AVRO_QUERIES.selectSimpleDownloadFields(true).values());
      template.process(data, out);
    }
  }

  /**
   * Generates the Hive query file used for simple with verbatim AVRO downloads.
   */
  private static void generateSimpleWithVerbatimAvroQueryHQL(Configuration cfg, File outDir) throws IOException, TemplateException {
    try (FileWriter out = new FileWriter(new File(outDir, "execute-simple-with-verbatim-avro-query.q"))) {
      Template template = cfg.getTemplate("simple-with-verbatim-avro-download/execute-simple-with-verbatim-avro-query.ftl");

      Map<String, InitializableField> simpleFields = AVRO_QUERIES.selectSimpleWithVerbatimDownloadFields(true);
      Map<String, InitializableField> verbatimFields = new TreeMap(AVRO_QUERIES.selectVerbatimFields());

      // Omit any verbatim fields present in the simple download.
      for (String field : simpleFields.keySet()) {
        verbatimFields.remove(field);
      }

      Map<String, Object> data = ImmutableMap.of(
        "simpleFields", simpleFields,
        "verbatimFields", verbatimFields
      );
      template.process(data, out);
    }
  }

  /**
   * Generates the schema used for simple with verbatim AVRO downloads.
   */
  private static void generateSimpleWithVerbatimAvroSchema(Configuration cfg, File outDir) throws IOException {
    try (FileWriter out = new FileWriter(new File(outDir, "simple-with-verbatim-occurrence.avsc"))) {

      Map<String, InitializableField> simpleFields = AVRO_SCHEMA_QUERIES.selectSimpleWithVerbatimDownloadFields(true);
      Map<String, InitializableField> verbatimFields = new TreeMap(AVRO_SCHEMA_QUERIES.selectVerbatimFields());

      // Omit any verbatim fields present in the simple download.
      for (String field : simpleFields.keySet()) {
        verbatimFields.remove(field);
      }

      SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder
        .record("SimpleWithVerbatimOccurrence")
        .namespace("org.gbif.occurrence.download.avro").fields();
      simpleFields.values().forEach(initializableField -> avroField(builder, initializableField));
      verbatimFields.values().forEach(initializableField -> avroField(builder, initializableField));
      Schema schema = builder.endRecord();

      out.write(schema.toString(true));
    }
  }

  /**
   * Generates the Hive query file used for IUCN's custom format downloads.
   */
  private static void generateIucnQueryHQL(Configuration cfg, File outDir) throws IOException, TemplateException {
    try (FileWriter out = new FileWriter(new File(outDir, "execute-iucn-query.q"))) {
      Template template = cfg.getTemplate("iucn-download/execute-iucn-query.ftl");
      Map<String, Object> data = ImmutableMap.of(
        "verbatimFields", AVRO_QUERIES.selectVerbatimFields(),
        "interpretedFields", AVRO_QUERIES.selectInterpretedFields(true),
        "internalFields", AVRO_QUERIES.selectInternalFields(true)
      );
      template.process(data, out);
    }
  }

  /**
   * Generates the AVRO schema for Map Of Life's custom format downloads.
   */
  private static void generateMapOfLifeSchema(Configuration cfg, File outDir) throws IOException {
    try (FileWriter out = new FileWriter(new File(outDir, "map-of-life.avsc"))) {

      Map<String, InitializableField> fields = SIMPLE_AVRO_SCHEMA_QUERIES.selectGroupedDownloadFields(MapOfLifeDownloadDefinition.MAP_OF_LIFE_DOWNLOAD_TERMS, true);

      SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder
        .record("MapOfLife")
        .namespace("org.gbif.occurrence.download.avro").fields();
      fields.values().forEach(initializableField -> avroField(builder, initializableField));
      Schema schema = builder.endRecord();

      out.write(schema.toString(true));
    }
  }

  /**
   * Generates the Hive query file used for Map Of Life's custom format downloads.
   */
  private static void generateMapOfLifeQueryHQL(Configuration cfg, File outDir) throws IOException, TemplateException {
    //AVRO_QUERIES.selectVerbatimFields().keySet().stream().forEach(System.out::println);
    //AVRO_QUERIES.selectInterpretedFields(true).keySet().stream().forEach(System.out::println);
    //AVRO_QUERIES.selectInternalFields(true).keySet().stream().forEach(System.out::println);
    try (FileWriter out = new FileWriter(new File(outDir, "execute-map-of-life-query.q"))) {
      Template template = cfg.getTemplate("map-of-life-download/execute-map-of-life-query.ftl");
      Map<String, Object> data = ImmutableMap.of(
        "fields", AVRO_QUERIES.selectGroupedDownloadFields(MapOfLifeDownloadDefinition.MAP_OF_LIFE_DOWNLOAD_TERMS, true)
      );
      template.process(data, out);
    }
  }
}