package edu.uncc.cs.watsonsim.search;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;

import edu.uncc.cs.watsonsim.Environment;
import edu.uncc.cs.watsonsim.Passage;
import edu.uncc.cs.watsonsim.Question;
import edu.uncc.cs.watsonsim.Score;
import edu.uncc.cs.watsonsim.scorers.Merge;

/**
 * @author Phani Rahul
 */
public class LuceneSearcher extends Searcher {
	private final IndexSearcher lucene;
	
	public LuceneSearcher(Environment env) {
		super(env);
		lucene = env.lucene;
		Score.register("LUCENE_ANSWER_RANK", -1, Merge.Mean);
		Score.register("LUCENE_ANSWER_SCORE", -1, Merge.Mean);
		Score.register("LUCENE_ANSWER_PRESENT", 0.0, Merge.Sum);
	}
	
	/**
	 * Create a Lucene query using the bigrams in the given text
	 * @param text
	 */
	public BooleanQuery queryFromSkipBigrams(String text) {
		BooleanQuery q = new BooleanQuery();
		String prev_word = null;
		for (String word : text.split("\\W+")) {
			if (prev_word != null) {
				PhraseQuery pq = new PhraseQuery();
				pq.setSlop(1);
				pq.add(new Term("text", prev_word));
				pq.add(new Term("text", word));
				q.add(pq, BooleanClause.Occur.SHOULD);
			}
			q.add(new TermQuery(new Term("text", word)), BooleanClause.Occur.SHOULD);
			prev_word = word;
		}
		return q;
	}
	
	
	public List<Passage> query(Question question) {
		List<Passage> results = new ArrayList<>();
		try {
			//ScoreDoc[] hits = env.simpleLuceneQuery(question.text, MAX_RESULTS);
			ScoreDoc[] hits = lucene.search(
					queryFromSkipBigrams(
							question.text
							+ " "
							+ question.getCategory()),
					MAX_RESULTS).scoreDocs;
			// This isn't range based because we need the rank
			for (int i=0; i < hits.length; i++) {
				ScoreDoc s = hits[i];
				Document doc = lucene.doc(s.doc);
				results.add(new edu.uncc.cs.watsonsim.Passage(
						"lucene", 			// Engine
						"",	// Title - filled in by shared db
						"", // Text - filled in by shared db
						doc.get("docno"))   // Reference
						.score("LUCENE_ANSWER_RANK", (double) i)        // Rank
						.score("LUCENE_ANSWER_SCORE", (double) s.score)	// Source
						.score("LUCENE_ANSWER_PRESENT", 1.0)
						);
			}
		} catch (IOException e) {
			System.out.println("Failed to query Lucene. Is the index in the correct location?");
			e.printStackTrace();
		}
		
		// Fill any missing full text from sources
		return fillFromSources(results);
	}

}