package lucene4ir.indexer;

import lucene4ir.Lucene4IRConstants;
import lucene4ir.utils.TokenAnalyzerMaker;
import org.apache.commons.compress.compressors.z.ZCompressorInputStream;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.io.*;
import java.nio.file.Paths;
import java.util.zip.GZIPInputStream;

/**
 * Created by leifos on 21/08/2016.
 * Edited by kojayboy on 16/08/2017.
 * Edited by Leifos 10/9/2017
 * Added extra method openDocumentFile that can handle different input file types
 * i.e. compressed (gz, etc) and creates the appropriate input reader
 * probably should re-factor class to provide a templated method with the BufferredReader to process for each file
 * and not the file itself.
 */
public class DocumentIndexer {

    protected boolean indexPositions;
    public IndexWriter writer;
    public Analyzer analyzer;

    public DocumentIndexer(){};

    public DocumentIndexer(String indexPath, String tokenFilterFile, boolean positional){
        writer = null;
        analyzer = Lucene4IRConstants.ANALYZER;
        indexPositions=positional;

        if (tokenFilterFile != null){
            TokenAnalyzerMaker tam = new TokenAnalyzerMaker();
            analyzer = tam.createAnalyzer(tokenFilterFile);
        }
        createWriter(indexPath);
    }


    public void createWriter(String indexPath){
        /*
        The indexPath specifies where to create the index
         */

        // I am can imagine that there are lots of ways to create indexers -
        // We could add in some parameters to customize its creation

        try {
            Directory dir = FSDirectory.open(Paths.get(indexPath));
            System.out.println("Indexing to directory '" + indexPath + "'...");

            IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
            writer = new IndexWriter(dir, iwc);

        } catch (IOException e){
            e.printStackTrace();
            System.exit(1);
        }
    }

    public void addDocumentToIndex(Document doc){
        try {
            writer.addDocument(doc);
        } catch (IOException e){
            e.printStackTrace();
            System.exit(1);
        }
    }

    public void indexDocumentsFromFile(String filename){
        /* to be implemented in sub classess*/
    };

    protected BufferedReader openDocumentFile(String filename){
        BufferedReader br = null;
        try {
            if(filename.endsWith(".gz")) {
                InputStream fileStream = new FileInputStream(filename);
                InputStream gzipStream = new GZIPInputStream(fileStream);
                Reader decoder = new InputStreamReader(gzipStream, "UTF-8");
                br = new BufferedReader(decoder);
            }
            else
            {
                // For the weirdness that is TREC collections.
                if (filename.endsWith(".Z") || filename.endsWith(".0Z") || filename.endsWith(".1Z") || filename.endsWith(".2Z")) {
                    InputStream fileStream = new FileInputStream(filename);
                    //InputStream zipStream = new ZCompressorInputStream(fileStream);
                    ZCompressorInputStream zipStream = new ZCompressorInputStream(fileStream);
                    Reader decoder = new InputStreamReader(zipStream, "UTF-8");
                    br = new BufferedReader(decoder);
                }
                else
                    br = new BufferedReader(new FileReader(filename));
            }




        } catch (Exception e){
            e.printStackTrace();
            System.exit(1);
        }
        return br;
    }


    public void finished(){
        try {
            if (writer != null){
                writer.close();
            }
        } catch (IOException e){
            e.printStackTrace();
            System.exit(1);
        }
    }

}