package info.ephyra.questionanalysis;

import info.ephyra.nlp.indices.FunctionWords;
import info.ephyra.nlp.semantics.ontologies.WordNet;
import info.ephyra.util.Dictionary;
import info.ephyra.util.FileUtils;
import info.ephyra.util.HashDictionary;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Hashtable;
import java.util.StringTokenizer;
import java.util.regex.PatternSyntaxException;

import javatools.PlingStemmer;

 * The <code>QuestionInterpreter</code> parses a question and determines the
 * TARGET, the CONTEXT and the PROPERTY it asks for.
 * @author Nico Schlaefer
 * @version 2005-11-09
public class QuestionInterpreter {
	/** The patterns that are applied to a question. */
	private static ArrayList<QuestionPattern> questionPatterns =
		new ArrayList<QuestionPattern>();
	/** For each PROPERTY a dictionary of keywords. */
	private static Hashtable<String, HashDictionary> keywords =
		new Hashtable<String, HashDictionary>();
	/** For each PROPERTY a template for a question asking for it. */
	private static Hashtable<String, String> questionTemplates =
		new Hashtable<String, String>();
	/** For each PROPERTY a template for an answer string. */
	private static Hashtable<String, String> answerTemplates =
		new Hashtable<String, String>();
	 * Adds the keywords in a descriptor of a question pattern to the dictionary
	 * for the respective PROPERTY.
	 * @param expr pattern descriptor 
	 * @param prop PROPERTY the question pattern belongs to
	private static void addKeywords(String expr, String prop) {
		// tokenize expr, delimiters are meta-characters, '<', '>' and blank
		StringTokenizer st = new StringTokenizer(expr, "\\|*+?.^$(){}[]<> ");
		String token;
		HashDictionary dict;
		while (st.hasMoreTokens()) {
			token = st.nextToken();
			if (token.length() > 2 && !FunctionWords.lookup(token)) {
				// token has a length of at least 3 and is not a function word
				dict = keywords.get(prop);
				if (dict == null) {  // new dictionary
					dict = new HashDictionary();
					keywords.put(prop, dict);
				dict.add(token);  // add token to the dictionary
	 * Loads the question patterns from a directory of PROPERTY files. Each file
	 * contains a list of pattern descriptors. Their format is described in the
	 * documentation of the class <code>QuestionPattern</code>.
	 * @param dir directory of the question patterns
	 * @return true, iff the question patterns were loaded successfully
	public static boolean loadPatterns(String dir) {
		File[] files = FileUtils.getFiles(dir);
		try {
			BufferedReader in;
			String prop, line;
			for (File file : files) {
				prop = file.getName();
				in = new BufferedReader(new FileReader(file));
				while (in.ready()) {
					line = in.readLine().trim();
					if (line.length() == 0 || line.startsWith("//"))
						continue;  // skip blank lines and comments
					if (line.startsWith("QUESTION_TEMPLATE")) {
						// add question template
						String[] tokens = line.split("\\s+", 2);
						if (tokens.length > 1)
							questionTemplates.put(prop, tokens[1]);
					} else if (line.startsWith("ANSWER_TEMPLATE")) {
						// add answer template
						String[] tokens = line.split("\\s+", 2);
						if (tokens.length > 1)
							answerTemplates.put(prop, tokens[1]);
					} else {
						try {
							// add question pattern
							questionPatterns.add(new QuestionPattern(line, prop));
							// add keywords to the dictionary for prop
							addKeywords(line, prop);
						} catch (PatternSyntaxException pse) {
							MsgPrinter.printErrorMsg("Problem loading pattern:\n" +
													 prop + " " + line);
		} catch (IOException e) {
			return false;
		return true;
	 * Interprets a question by applying the question patterns and returns the
	 * interpretations of minimal length.
	 * @param qn normalized question string
	 * @param stemmed stemmed question string
	 * @return array of interpretations or an empty array, if there was no
	 * 		   matching question pattern
	public static QuestionInterpretation[] interpret(String qn,
													 String stemmed) {
		ArrayList<QuestionInterpretation> qis =
			new ArrayList<QuestionInterpretation>();
		// apply the question patterns
		for (QuestionPattern questionPattern : questionPatterns) {
			QuestionInterpretation qi =
				questionPattern.apply(qn, stemmed);
			if (qi != null) qis.add(qi);
		// sort the interpretations by their length
		QuestionInterpretation[] sorted =
			qis.toArray(new QuestionInterpretation[qis.size()]);
		// only return interpretations of minimal length
		ArrayList<QuestionInterpretation> minLength =
			new ArrayList<QuestionInterpretation>();
		for (QuestionInterpretation qi : sorted)
			if (qi.getLength() == sorted[0].getLength()) minLength.add(qi);
		return minLength.toArray(new QuestionInterpretation[minLength.size()]);
	 * Looks up a word in the dictionary for the given PROPERTY.
	 * @param word the word to be looked up
	 * @param prop the PROPERTY
	 * @return true, iff <code>word</code> is in the dictionary for
	 * 		   <code>prop</code>
	public static boolean lookupKeyword(String word, String prop) {
		Dictionary dict = keywords.get(prop);
		if (dict == null) return false;
		if (dict.contains(word)) return true;
		String stem = PlingStemmer.stem(word);
		if (dict.contains(stem)) return true;
		String lemma = WordNet.getLemma(word, WordNet.VERB);
		if (lemma != null && dict.contains(lemma)) return true;
		return false;
	 * Returns a question string that asks for the specified property of the
	 * target object or <code>null</code> if no question template is available
	 * for the property.
	 * @param to target object
	 * @param prop property
	 * @return question string or <code>null</code>
	public static String getQuestion(String to, String prop) {
		String question = questionTemplates.get(prop);
		if (question == null) return null;
		return question.replace("<TO>", to);
	 * Returns an answer string that expresses that the property object is an
	 * instance of the specified property or <code>null</code> if no answer
	 * template is available for the property.
	 * @param po property object
	 * @param prop property
	 * @return answer string or <code>null</code>
	public static String getAnswer(String po, String prop) {
		String answer = answerTemplates.get(prop);
		if (answer == null) return null;
		return answer.replace("<PO>", po);