/*
	Copyright 2017 Digital Learning Sciences (DLS) at the
	University Corporation for Atmospheric Research (UCAR),
	P.O. Box 3000, Boulder, CO 80307

	Licensed under the Apache License, Version 2.0 (the "License");
	you may not use this file except in compliance with the License.
	You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
*/
package org.dlese.dpc.index.queryParser;

import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.ParseException;
import java.util.*;
import java.text.SimpleDateFormat;
import java.io.*;
import java.net.URL;

import org.dom4j.Document;
import org.dom4j.Node;
import org.dom4j.Element;
import org.dom4j.Attribute;
import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader;

import org.dlese.dpc.index.reader.*;
import org.dlese.dpc.xml.Dom4jUtils;

/**
 *  Creates a Lucene Query from an XML representation of a query. See <a
 *  href="../../../../../javadoc-includes/XMLQueryParser-sample.xml">sample Query XML file</a> . Currently
 *  supports term, phrase, boolean and Lucene query syntax formatted queries, with option to set the boost
 *  factor assigned to any query (default boost is 1.0). If a Lucene query syntax based query may be found in
 *  the Query XML, then a Lucene QueryParser must be supplied, otherwise it may be ommitted.
 *
 * @author    John Weatherley
 * @see       org.dlese.dpc.index.VirtualSearchFieldMapper
 */
public class XMLQueryParser {
	private boolean debug = true;


	/**
	 *  Gets the Lucene query String representation for the given XML Query.
	 *
	 * @param  queryXml       A XML Query
	 * @param  queryParser    QueryParser used to parse Lucene syntax based queries and tokenize the text
	 * @return                The queryString value
	 * @exception  Exception  If error
	 */
	public static String getQueryString(String queryXml, QueryParser queryParser) throws Exception {
		Query query = getLuceneQuery(queryXml, queryParser);
		if (query == null)
			return "";
		return query.toString();
	}


	/**
	 *  Gets the Lucene Query representation for the given XML Query.
	 *
	 * @param  queryXml       A file containing an XML Query
	 * @param  queryParser    QueryParser used to parse Lucene syntax based queries and tokenize the text
	 * @return                The Lucene Query object representation of this XML Query
	 * @exception  Exception  If error
	 */
	public static Query getLuceneQuery(File queryXml, QueryParser queryParser) throws Exception {
		return doGetLuceneQuery(Dom4jUtils.getXmlDocument(new URL("file://" + queryXml.getAbsolutePath())), queryParser);
	}


	/**
	 *  Gets the Lucene Query representation for the given XML Query.
	 *
	 * @param  queryXml       A URL to a file containing an XML Query
	 * @param  queryParser    QueryParser used to parse Lucene syntax based queries and tokenize the text
	 * @return                The Lucene Query object representation of this XML Query
	 * @exception  Exception  If error
	 */
	public static Query getLuceneQuery(URL queryXml, QueryParser queryParser) throws Exception {
		return doGetLuceneQuery(Dom4jUtils.getXmlDocument(queryXml), queryParser);
	}


	/**
	 *  Gets the Lucene Query representation for the given XML Query.
	 *
	 * @param  queryXml       An XML Query
	 * @param  queryParser    QueryParser used to parse Lucene syntax based queries and tokenize the text
	 * @return                The Lucene Query object representation of this XML Query
	 * @exception  Exception  If error
	 */
	public static Query getLuceneQuery(String queryXml, QueryParser queryParser) throws Exception {
		return doGetLuceneQuery(Dom4jUtils.getXmlDocument(queryXml), queryParser);
	}


	private static Query doGetLuceneQuery(Document xmlDoc, QueryParser queryParser) throws Exception {
		List queryElement = xmlDoc.selectNodes("/Query/*");
		if (queryElement == null || queryElement.size() == 0)
			throw new Exception("Error parsing Query: root element '<Query>' is empty or missing.");
		if (queryElement.size() > 1)
			throw new Exception("Error parsing Query: '<Query>' may contain only 1 child element but " + queryElement.size() + " were found.");

		return getLuceneQuery((Element) queryElement.get(0), queryParser);
	}


	/**
	 *  Gets the Lucene Query representation for the given XML Query starting at the Query element.
	 *
	 * @param  queryElement   A dom4j representation of the Query element
	 * @param  queryParser    QueryParser used to parse Lucene syntax based queries and tokenize the text
	 * @return                The Lucene Query object representation of this XML Query
	 * @exception  Exception  If error
	 */
	public static Query getLuceneQuery(Element queryElement, QueryParser queryParser) throws Exception {
		// Check if requested to exclude from results, which is an error if not within a boolean clause
		String excludeOrRequire = queryElement.attributeValue("excludeOrRequire");
		if (excludeOrRequire != null)
			throw new Exception("Error parsing document: attribute excludeOrRequire may only be used when the query is enclosed in an encompassing <booleanQuery>. Error found at: " + queryElement.getUniquePath());

		if (queryElement.getName().equals("booleanQuery"))
			return makeBooleanQuery(queryElement, queryParser);
		else if (queryElement.getName().equals("textQuery"))
			return makeLuceneQuery(queryElement, queryParser);
		else if (queryElement.getName().equals("luceneQuery"))
			return makeLuceneQuery(queryElement, queryParser);
		else
			throw new Exception("Error parsing document: invalid element name '<" + queryElement.getName() + ">' at " + queryElement.getUniquePath());
	}



	// ------------------- Methods that parse the query  XML to create Lucene Queries ------------------

	private static Query makeBooleanQuery(Element booleanQueryElement, QueryParser queryParser) throws Exception {
		String operator = booleanQueryElement.valueOf("@type");

		boolean isRequired = false;
		boolean isProhibited = false;

		if (operator == null)
			throw new Exception("Error parsing document: element <booleanQuery> must contain an attribite named 'type' that contains the value 'AND' or 'OR'. Error found at " + booleanQueryElement.getUniquePath());
		else if (operator.equalsIgnoreCase("OR"))
			isRequired = false;
		else if (operator.equalsIgnoreCase("AND"))
			isRequired = true;
		else
			throw new Exception("Error parsing document: element <booleanQuery> must contain an attribite named 'type' that contains the value 'AND' or 'OR' but value '" + operator + "' was found. Error found at " + booleanQueryElement.getUniquePath());

		BooleanQuery booleanQuery = new BooleanQuery();

		// iterate through child elements of booleanClause
		Query query = null;
		for (Iterator i = booleanQueryElement.elementIterator(); i.hasNext(); ) {
			Element element = (Element) i.next();

			// Exclude from results or require (overrides previous boolean designation)?
			String excludeOrRequire = element.attributeValue("excludeOrRequire");
			if (excludeOrRequire != null) {
				excludeOrRequire = excludeOrRequire.trim();
				if (excludeOrRequire.equalsIgnoreCase("exclude")) {
					isRequired = false;
					isProhibited = true;
				}
				else if (excludeOrRequire.equalsIgnoreCase("require")) {
					isRequired = true;
					isProhibited = false;
				}
				else if (excludeOrRequire.equalsIgnoreCase("neither")) {
					isRequired = false;
					isProhibited = false;
				}
				else {
					throw new Exception("Error parsing document: the value of attribute excludeOrRequire must be one of 'exclude', 'require' or 'neither' but '"
						 + excludeOrRequire + "' was found at " + booleanQueryElement.getUniquePath());
				}
			}

			if (element.getName().equals("booleanQuery")) {
				query = makeBooleanQuery(element, queryParser);
				if (query != null) {
					if (isRequired && !isProhibited)
						booleanQuery.add(query, BooleanClause.Occur.MUST);
					else if (!isRequired && isProhibited)
						booleanQuery.add(query, BooleanClause.Occur.MUST_NOT);
					else
						booleanQuery.add(query, BooleanClause.Occur.SHOULD);
				}
			}
			else {
				query = makeLuceneQuery(element, queryParser);
				if (query != null) {
					if (isRequired && !isProhibited)
						booleanQuery.add(query, BooleanClause.Occur.MUST);
					else if (!isRequired && isProhibited)
						booleanQuery.add(query, BooleanClause.Occur.MUST_NOT);
					else
						booleanQuery.add(query, BooleanClause.Occur.SHOULD);
				}
			}
		}

		return applyBoost(booleanQueryElement, booleanQuery);
	}


	private static Query makeLuceneQuery(Element luceneQueryElement, QueryParser queryParser) throws Exception {
		Query query;

		// Handle textQuery:
		if (luceneQueryElement.getName().equals("textQuery")) {
			String type = luceneQueryElement.valueOf("@type");
			String field = luceneQueryElement.valueOf("@field");
			String text = luceneQueryElement.getText();

			if (text == null || text.trim().length() == 0)
				return null;
			if (type == null || type.trim().length() == 0)
				throw new Exception("Error parsing document: element <textQuery> has empty or missing attribute 'type'. Error found at " + luceneQueryElement.getUniquePath());

			if (field == null || field.trim().length() == 0)
				field = queryParser.getField();

			if (type.equals("matchAllTerms") || type.equals("matchAnyTerm")) {
				BooleanQuery booleanQuery = new BooleanQuery();
				boolean isRequired = true;
				if (type.equals("matchAnyTerm"))
					isRequired = false;
				synchronized (queryParser) {
					TokenStream tokenStream = queryParser.getAnalyzer().tokenStream(field, new StringReader(text));
					tokenStream.reset();

					do {
						Token token = tokenStream.getAttribute(new Token().getClass());
						if (token != null) {
							if (isRequired)
								booleanQuery.add(new TermQuery(new Term(field, token.term())), BooleanClause.Occur.MUST);
							else
								booleanQuery.add(new TermQuery(new Term(field, token.term())), BooleanClause.Occur.SHOULD);
						}
					} while (tokenStream.incrementToken());

					tokenStream.end();
					tokenStream.close();
				}
				query = booleanQuery;
			}
			else if (type.equals("matchPhrase")) {
				PhraseQuery phraseQuery = new PhraseQuery();
				synchronized (queryParser) {
					TokenStream tokenStream = queryParser.getAnalyzer().tokenStream(field, new StringReader(text));
					tokenStream.reset();

					do {
						Token token = tokenStream.getAttribute(new Token().getClass());
						if (token != null) {
							phraseQuery.add(new Term(field, token.term()));
						}
					} while (tokenStream.incrementToken());

					tokenStream.end();
					tokenStream.close();
				}
				query = phraseQuery;
			}
			else if (type.equals("matchKeyword")) {
				query = new TermQuery(new Term(field, text));
			}
			// Add support for RangeQuery and others here later...
			else
				throw new Exception("Error parsing document: element <textQuery> type attribute was found to be '" + type + "' but must be of one of 'matchAllTerms', 'matchAnyTerm', 'matchPhrase' or 'matchKeyword'. Error found at " + luceneQueryElement.getUniquePath());
		}

		// Handle luceneQuery:
		else if (luceneQueryElement.getName().equals("luceneQuery")) {
			String queryString = luceneQueryElement.getText();
			if (queryString == null)
				throw new Exception("Error parsing document: element <luceneQuery> is missing the attribute 'query'. Error found at " + luceneQueryElement.getUniquePath());
			if (queryParser == null)
				throw new Exception("Error parsing document: <luceneQuery> element found at " + luceneQueryElement.getUniquePath() + " but the Lucene queryParser is null");

			synchronized (queryParser) {
				try {
					query = queryParser.parse(queryString);
				} catch (ParseException pe) {
					throw new Exception("These was a Lucene query syntax error found at " + luceneQueryElement.getUniquePath() + ". Error was: " + pe.getMessage());
				}
			}
		}
		else
			throw new Exception("Error parsing document: invalid element name '<" + luceneQueryElement.getName() + ">'. Error found at " + luceneQueryElement.getUniquePath());

		return applyBoost(luceneQueryElement, query);
	}


	private static Query applyBoost(Element queryElement, Query query) throws Exception {
		if (query == null || queryElement == null)
			return query;

		String boost = queryElement.valueOf("@boost");
		if (boost != null && boost.length() > 0) {
			float b = 1;
			try {
				b = Float.parseFloat(boost);
			} catch (NumberFormatException nfe) {
				String path = queryElement.attribute("boost").getUniquePath();
				throw new Exception("Error parsing document: boost value was not valid (" +
					nfe.getMessage() + "). Value must be a number, for example 1.5, 2.4. Error found at " + path);
			}
			query.setBoost(b);
		}
		return query;
	}


	//================================================================

	/**
	 *  Return a string for the current time and date, sutiable for display in log files and output to standout:
	 *
	 * @return    The dateStamp value
	 */
	private static String getDateStamp() {
		return
			new SimpleDateFormat("MMM d, yyyy h:mm:ss a zzz").format(new Date());
	}


	/**
	 *  Output a line of text to error out, with datestamp.
	 *
	 * @param  s  The text that will be output to error out.
	 */
	private final void prtlnErr(String s) {
		System.err.println(getDateStamp() + " XMLQueryParser Error: " + s);
	}


	/**
	 *  Output a line of text to standard out, with datestamp, if debug is set to true.
	 *
	 * @param  s  The String that will be output.
	 */
	private final void prtln(String s) {
		if (debug)
			System.out.println(getDateStamp() + " XMLQueryParser: " + s);
	}


	/**
	 *  Sets the debug attribute of the DocumentService object
	 *
	 * @param  db  The new debug value
	 */
	private final void setDebug(boolean db) {
		debug = db;
	}
}