package org.webdsl.search;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.LevensteinDistance;
import org.apache.lucene.search.spell.LuceneDictionary;
import org.apache.lucene.search.spell.StringDistance;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.Version;

/**
 * <p>
 *   Auto completer class  (Main class) <br/>
 *  (Based on Lucene SpellChecker class).
 * </p>
 *
 * <p>Example Usage:
 *
 * <pre>
 *  AutoCompleter autocompleter = new AutoCompleter(autocompleteIndexDirectory);
 *  // To index a field of a user index:
 *  autocompleter.indexDictionary(new LuceneDictionary(my_lucene_reader, a_field));
 *  // To index a file containing words:
 *  autocompleter.indexDictionary(new PlainTextDictionary(new File("myfile.txt")));
 *  String[] suggestions = autocompleter.suggestSimilar("toComplete", 5);
 * </pre>
 *
 *
 * @version 1.0
 */
public class AutoCompleter implements java.io.Closeable {


  /**
   * Field name for each word in the ngram index.
   */
  public static final String F_WORD = "word";

  private static final int MAX_PREFIX_LENGTH = 10;

  private static final String F_FREQ = "frequency";

  private static final Term F_WORD_TERM = new Term(F_WORD);

  /**
   * the autocomplete index
   */
  // don't modify the directory directly - see #swapSearcher()
  // TODO: why is this package private?
  Directory autoCompleteIndex;

  // don't use this searcher directly - see #swapSearcher()

  private IndexSearcher searcher;
  /*
   * this locks all modifications to the current searcher.
   */

  private final Object searcherLock = new Object();
  /*
   * this lock synchronizes all possible modifications to the
   * current index directory. It should not be possible to try modifying
   * the same index concurrently. Note: Do not acquire the searcher lock
   * before acquiring this lock!
   */
  private final Object modifyCurrentIndexLock = new Object();

  private volatile boolean closed = false;

  /**
   * Use the given directory as an auto completer index with a
   * {@link LevensteinDistance} as the default {@link StringDistance}. The
   * directory is created if it doesn't exist yet.
   *
   * @param autoCompleteIndex
   *          the autocomplete index directory
   * @throws IOException
   *           if autocompleter can not open the directory
   */
  public AutoCompleter(Directory autocompleteIndex) throws IOException {
      setAutoCompleteIndex(autocompleteIndex);
  }


  /**
   * Use a different index as the auto completer index or re-open
   * the existing index if <code>autocompleteIndex</code> is the same value
   * as given in the constructor.
   * @param autocompleteIndexDir the autocomplete directory to use
   * @throws AlreadyClosedException if the Autocompleter is already closed
   * @throws  IOException if autocompleter can not open the directory
   */
  // TODO: we should make this final as it is called in the constructor
  public void setAutoCompleteIndex(Directory autocompleteIndexDir) throws IOException {
    // this could be the same directory as the current autocompleteIndex
    // modifications to the directory should be synchronized
    synchronized (modifyCurrentIndexLock) {
      ensureOpen();
      if (!IndexReader.indexExists(autocompleteIndexDir)) {
          IndexWriter writer = new IndexWriter(autocompleteIndexDir,
            new IndexWriterConfig(Version.LUCENE_CURRENT,
                new WhitespaceAnalyzer(Version.LUCENE_CURRENT)));
          writer.close();
      }
      swapSearcher(autocompleteIndexDir);
    }
  }

  /**
   * Suggest similar words (optionally restricted to a field of an index).
   *
   * <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
   * is not the same as the edit distance strategy used to calculate the best
   * matching autocomplete word from the hits that Lucene found, one usually has
   * to retrieve a couple of numSug's in order to get the true best match.
   *
   * <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
   * Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
   *
   * @param word the word you want a auto complete done on
   * @param numSug the number of suggested words
   * @param ir the indexReader of the user index (can be null see field param)
   * @param field the field of the user index: if field is not null, the suggested
   * words are restricted to the words present in this field.
   * @throws IOException if the underlying index throws an {@link IOException}
   * @throws AlreadyClosedException if the Autocompleter is already closed
   * @return List<String> the sorted list of the suggest words with these 2 criteria:
   * first criteria: the edit distance, second criteria (only if restricted mode): the popularity
   * of the suggest words in the field of the user index
   */
  public String[] suggestSimilar(String word, int numSug) throws IOException {
    // obtainSearcher calls ensureOpen
    final IndexSearcher indexSearcher = obtainSearcher();
    try{
      BooleanQuery query = new BooleanQuery();
      List<String[]> grams = formGrams(word);
      String key;
      for (String[] gramArray : grams) {
          for (int i = 0; i < gramArray.length; i++) {
                key = "start" + gramArray[i].length(); // form key
                add(query, key, gramArray[i]);
              }
      }


      int maxHits = 2 * numSug;

      //First sort on similarity, then on popularity (based on frequency in the source index)
      SortField[] sortFields = {SortField.FIELD_SCORE, new SortField(F_FREQ, SortField.INT, true)};

      ScoreDoc[] hits = indexSearcher.search(query, maxHits, new Sort(sortFields)).scoreDocs;
      //indexSearcher.search(query, null, maxHits).scoreDocs;
      int stop = Math.min(hits.length, maxHits);
      String[] toReturn = new String[stop];

      for (int i = 0; i < stop; i++) {
        toReturn[i] =  indexSearcher.doc(hits[i].doc).get(F_WORD); // get orig word
      }
      return toReturn;
    } finally {
      releaseSearcher(indexSearcher);
    }
  }

  /**
   * Add a clause to a boolean query.
   */
  private static void add(BooleanQuery q, String name, String value) {
    q.add(new BooleanClause(new TermQuery(new Term(name, value)), BooleanClause.Occur.SHOULD));
  }

  /**
   * Returns at most 3 ngrams for each token (whitespace separated), so 2 typos can be made at the end of
   * each token from the currently typed string.
   * @param text the word to parse
   * @return an list of arrays of all ngrams in the word and note that duplicates are not removed
   */
  private static List<String[]> formGrams(String text) {
    //first split into tokens to match words in phrases
    text = text.toLowerCase(); //search prefixes in lower case!
    String[] tokens = text.split("\\s");
    int len, tokenlen;
    ArrayList<String[]> grams = new ArrayList<String[]>();

    for (String token : tokens) {
        len = 3;
        tokenlen = Math.min(token.length(), MAX_PREFIX_LENGTH);
        if (tokenlen < 3) {
          len = tokenlen;
        }

        String[] res = new String[len];
        for (int i = 0; i < len; i++) {
          res[i] = token.substring(0, tokenlen-i);
        }
        grams.add(res);
    }


    return grams;
  }

  /**
   * Removes all terms from the auto complete index.
   * @throws IOException
   * @throws AlreadyClosedException if the Autocompleter is already closed
   */
  public void clearIndex() throws IOException {
    synchronized (modifyCurrentIndexLock) {
      ensureOpen();
      final Directory dir = this.autoCompleteIndex;
      final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
          Version.LUCENE_CURRENT,
          new WhitespaceAnalyzer(Version.LUCENE_CURRENT))
          .setOpenMode(OpenMode.CREATE));
      writer.close();
      swapSearcher(dir);
    }
  }

  /**
   * Check whether the word exists in the index.
   * @param word
   * @throws IOException
   * @throws AlreadyClosedException if the Autocompleter is already closed
   * @return true if the word exists in the index
   */
  public boolean exist(String word) throws IOException {
    // obtainSearcher calls ensureOpen
    final IndexSearcher indexSearcher = obtainSearcher();
    try{
      return indexSearcher.docFreq(F_WORD_TERM.createTerm(word)) > 0;
    } finally {
      releaseSearcher(indexSearcher);
    }
  }

  /**
   * Indexes the data from the given reader.
 * @param reader Source index reader, from which autocomplete words are obtained for the defined field
 * @param field the field of the source index reader to index for autocompletion
 * @param mergeFactor mergeFactor to use when indexing
 * @param ramMB the max amount or memory in MB to use
 * @param optimize whether or not the autocomplete index should be optimized
   * @throws AlreadyClosedException if the Autocompleter is already closed
   * @throws IOException
   */
  public final void indexDictionary(IndexReader reader, String field, int mergeFactor, int ramMB, boolean optimize) throws IOException {
    synchronized (modifyCurrentIndexLock) {
      ensureOpen();
      final Directory dir = this.autoCompleteIndex;
      final Dictionary dict = new LuceneDictionary(reader, field);
      final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT)).setRAMBufferSizeMB(ramMB));
      IndexSearcher indexSearcher = obtainSearcher();
      final List<IndexReader> readers = new ArrayList<IndexReader>();

      if (searcher.maxDoc() > 0) {
        ReaderUtil.gatherSubReaders(readers, searcher.getIndexReader());
      }

      //clear the index
      writer.deleteAll();

      try {
        Iterator<String> iter = dict.getWordsIterator();

      while (iter.hasNext()) {
          String word = iter.next();

          // ok index the word
          Document doc = createDocument(word, reader.docFreq(new Term(field, word)));
          writer.addDocument(doc);
        }
      } finally {
        releaseSearcher(indexSearcher);
      }
      // close writer
      if (optimize)
        writer.optimize();
      writer.close();
      // also re-open the autocomplete index to see our own changes when the next suggestion
      // is fetched:
      swapSearcher(dir);
    }
  }

  /**
   * Indexes the data from the given {@link Dictionary}.
 * @param reader Source index reader, from which autocomplete words are obtained for the defined field
 * @param field the field of the source index reader to index for autocompletion
 * @param mergeFactor mergeFactor to use when indexing
 * @param ramMB the max amount or memory in MB to use
   * @throws IOException
   */
  public final void indexDictionary(IndexReader reader, String field, int mergeFactor, int ramMB) throws IOException {
    indexDictionary(reader, field, mergeFactor, ramMB, true);
  }

  /**
   * Indexes the data from the given {@link Dictionary}.
 * @param reader Source index reader, from which autocomplete words are obtained for the defined field
 * @param field the field of the source index reader to index for autocompletion
   * @throws IOException
   */
  public final void indexDictionary(IndexReader reader, String field) throws IOException {
    indexDictionary(reader, field, 300, (int)IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
  }

  private static int getMax(int l) {
    if (l > MAX_PREFIX_LENGTH) {
      return MAX_PREFIX_LENGTH;
    }
    return l;
  }

  private static Document createDocument(String text, int freq) {
    Document doc = new Document();
    // the word field is never queried on... its indexed so it can be quickly
    // checked for rebuild (and stored for retrieval). Doesn't need norms or TF/pos
    Field f = new Field(F_WORD, text, Field.Store.YES, Field.Index.NOT_ANALYZED);
    f.setOmitTermFreqAndPositions(true);
    f.setOmitNorms(true);
    doc.add(f); // orig term
    NumericField nf = new NumericField(F_FREQ).setIntValue(freq);
    doc.add(nf);
    addGram(text, doc);
    return doc;
  }

  private static void addGram(String text, Document doc) {
    //If phrases are indexed as completions, it is nice to suggest these phrase if a token is matched
    //i.e. the phrase "Best practices in software architecture" is suggested on input "softw..."
    text = text.toLowerCase(); //index prefixes as lower case!
    String[] tokens = text.split("\\s");
    String token, key, gram;
    for (int t = 0; t < tokens.length; t++) {
        token = tokens[t];
        int len = getMax(token.length());
        for (int i = 1; i <= len; i++) {
          key = "start" + i;
          gram = token.substring(0, i);
          doc.add(new Field(key, gram, Field.Store.NO, Field.Index.NOT_ANALYZED));
        }
    }
  }

  private IndexSearcher obtainSearcher() {
    synchronized (searcherLock) {
      ensureOpen();
      searcher.getIndexReader().incRef();
      return searcher;
    }
  }

  private void releaseSearcher(final IndexSearcher aSearcher) throws IOException{
      // don't check if open - always decRef
      // don't decrement the private searcher - could have been swapped
      aSearcher.getIndexReader().decRef();
  }

  private void ensureOpen() {
    if (closed) {
      throw new AlreadyClosedException("Autocompleter has been closed");
    }
  }

  /**
   * Close the IndexSearcher used by this AutoCompleter
   * @throws IOException if the close operation causes an {@link IOException}
   * @throws AlreadyClosedException if the {@link AutoCompleter} is already closed
   */
  public void close() throws IOException {
    synchronized (searcherLock) {
      ensureOpen();
      closed = true;
      if (searcher != null) {
        searcher.close();
      }
      searcher = null;
    }
  }

  private void swapSearcher(final Directory dir) throws IOException {
    /*
     * opening a searcher is possibly very expensive.
     * We rather close it again if the Autocompleter was closed during
     * this operation than block access to the current searcher while opening.
     */
    final IndexSearcher indexSearcher = createSearcher(dir);
    synchronized (searcherLock) {
      if(closed){
        indexSearcher.close();
        throw new AlreadyClosedException("Autocompleter has been closed");
      }
      if (searcher != null) {
        searcher.close();
      }
      // set the autocomplete index in the sync block - ensure consistency.
      searcher = indexSearcher;
      this.autoCompleteIndex = dir;
    }
  }

  /**
   * Creates a new read-only IndexSearcher
   * @param dir the directory used to open the searcher
   * @return a new read-only IndexSearcher
   * @throws IOException f there is a low-level IO error
   */
  // for testing purposes
  IndexSearcher createSearcher(final Directory dir) throws IOException{
    return new IndexSearcher(dir, true);
  }

  /**
   * Returns <code>true</code> if and only if the {@link AutoCompleter} is
   * closed, otherwise <code>false</code>.
   *
   * @return <code>true</code> if and only if the {@link AutoCompleter} is
   *         closed, otherwise <code>false</code>.
   */
  boolean isClosed(){
    return closed;
  }

}