package uk.ac.shef.dcs.jate.app;

import org.apache.solr.core.SolrCore;
import org.apache.solr.search.SolrIndexSearcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.ac.shef.dcs.jate.JATEException;
import uk.ac.shef.dcs.jate.JATEProperties;
import uk.ac.shef.dcs.jate.algorithm.ChiSquare;
import uk.ac.shef.dcs.jate.feature.*;
import uk.ac.shef.dcs.jate.model.JATETerm;

import java.io.IOException;
import java.nio.file.Paths;
import java.util.List;
import java.util.Map;

public class AppChiSquare extends App {
    private final Logger log = LoggerFactory.getLogger(getClass());
    //top 30% of the terms are considered to be 'frequent'
    private double frequentTermFT = 0.3;

    /**
     * @param args command-line params accepting solr home path, solr core name
     *             <p>
     *             and more optional run-time parameters
     * @see uk.ac.shef.dcs.jate.app.AppParams
     * <p>
     * Chisquare specific setting: frequent term cutoff percentage
     * @see uk.ac.shef.dcs.jate.app.AppParams#CHISQUERE_FREQ_TERM_CUTOFF_PERCENTAGE
     */
    public static void main(String[] args) throws JATEException {
        if (args.length < 1) {
            printHelp();
            System.exit(1);
        }

        String solrHomePath = args[args.length - 2];
        String solrCoreName = args[args.length - 1];

        Map<String, String> params = getParams(args);
        String jatePropertyFile = getJATEProperties(params);
        String corpusDir = getCorpusDir(params);

        List<JATETerm> terms;
        try {
            App app = new AppChiSquare(params);
            if (isCorpusProvided(corpusDir)) {
                app.index(Paths.get(corpusDir), Paths.get(solrHomePath), solrCoreName, jatePropertyFile);
            }

            terms = app.extract(solrHomePath, solrCoreName, jatePropertyFile);

            if (isExport(params)) {
                app.write(terms);
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            System.exit(0);
        }
    }

    /**
     * @param initParams initial parameters including pre-filtering and post-filtering parameters
     *                   and chisquare specific parameter
     * @throws JATEException
     * @see uk.ac.shef.dcs.jate.app.AppParams
     */
    public AppChiSquare(Map<String, String> initParams) throws JATEException {
        super(initParams);
        initializeFTParam(initParams);
    }

    /**
     * @param initParams chisquare specific initial parameter
     * @throws JATEException
     * @see uk.ac.shef.dcs.jate.app.AppParams#CHISQUERE_FREQ_TERM_CUTOFF_PERCENTAGE
     */
    private void initializeFTParam(Map<String, String> initParams) throws JATEException {
        //This param is Chi-Square only
        String sFT = initParams.get(AppParams.CHISQUERE_FREQ_TERM_CUTOFF_PERCENTAGE.getParamKey());
        if (sFT != null) {
            try {
                frequentTermFT = Double.parseDouble(sFT);
                if (frequentTermFT > 1.0 || frequentTermFT <= 0.0)
                    throw new JATEException("Frequent Term cutoff percentage ('-ft') is not set correctly! " +
                            "Value must be within (0,1.0]");
            } catch (NumberFormatException nfe) {
                throw new JATEException("Frequent Term cutoff percentage ('-ft') is not set correctly! " +
                        "A decimal value is expected!");
            }
        }

    }

    @Override
    public List<JATETerm> extract(SolrCore core, String jatePropertyFile)
            throws IOException, JATEException {
        JATEProperties properties = getJateProperties(jatePropertyFile);

        return extract(core, properties);

    }

    public List<JATETerm> extract(SolrCore core, JATEProperties properties) throws JATEException {
        log.info("extract terms from core ... ");
        if (core.isClosed()) {
            core.open();
        }
        SolrIndexSearcher searcher = core.getSearcher().get();
//        try {
            FrequencyTermBasedFBMaster ftbb = new FrequencyTermBasedFBMaster(searcher, properties, 0);
            FrequencyTermBased ft = (FrequencyTermBased) ftbb.build();

            //sentence is a context
            FrequencyCtxSentenceBasedFBMaster fcsbb = new FrequencyCtxSentenceBasedFBMaster(searcher, properties, 0);
            FrequencyCtxBased fcs = (FrequencyCtxBased) fcsbb.build();
            FrequencyCtxBased ref_fcs = (FrequencyCtxBased)
                    (new FrequencyCtxBasedCopier(searcher, properties, fcs, ft, frequentTermFT).build());
            //window is a context
            /*FrequencyCtxWindowBasedFBMaster fcsbb = new FrequencyCtxWindowBasedFBMaster(searcher, properties, null, 5, 0);
            FrequencyCtxBased fcsb = (FrequencyCtxBased) fcsbb.build();
            FrequencyCtxBased ref_fcsb = (FrequencyCtxBased)
                    (new FrequencyCtxWindowBasedFBMaster(searcher, properties, fcsb.getMapCtx2TTF().keySet(), 5, 0).build());*/

            CooccurrenceFBMaster cob = new CooccurrenceFBMaster(searcher, properties, ft,
                    this.prefilterMinTTF, fcs, ref_fcs, this.prefilterMinTCF);
            Cooccurrence co = (Cooccurrence) cob.build();

            //feature expected probability for frequent terms
            ChiSquareFrequentTermsFBMaster cf = new ChiSquareFrequentTermsFBMaster(
                    ref_fcs.getMapCtx2TTF(), ref_fcs.getTerm2Ctx(), ft.getCorpusTotal(), properties);
            ChiSquareFrequentTerms cff = (ChiSquareFrequentTerms) cf.build();

            ChiSquare chi = new ChiSquare();
            chi.registerFeature(FrequencyCtxBased.class.getName() + ChiSquare.SUFFIX_TERM, fcs);
            chi.registerFeature(Cooccurrence.class.getName(), co);
            chi.registerFeature(ChiSquareFrequentTerms.class.getName(), cff);

            log.info("start to run chisquare ...");
            List<JATETerm> terms = chi.execute(co.getTerms());
            log.info("complete chisquare statistics for all terms.");

            log.info("post-filtering terms ...");
            terms = cutoff(terms);
            log.info("complete postfiltering of terms.");

            addAdditionalTermInfo(terms, searcher, properties.getSolrFieldNameJATENGramInfo(),
                    properties.getSolrFieldNameID());
            return terms;
//        } finally {
//            try {
//                searcher.close();
//            } catch (IOException e) {
//                log.error(e.toString());
//            }
//        }
    }

}