/*******************************************************************************
 * Copyright (c) 2011 Dipanjan Das 
 * Language Technologies Institute, 
 * Carnegie Mellon University, 
 * All Rights Reserved.
 * 
 * WordNetRelations.java is part of SEMAFOR 2.0.
 * 
 * SEMAFOR 2.0 is free software: you can redistribute it and/or modify  it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation, either version 3 of the License, or 
 * (at your option) any later version.
 * 
 * SEMAFOR 2.0 is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU General Public License for more details. 
 * 
 * You should have received a copy of the GNU General Public License along
 * with SEMAFOR 2.0.  If not, see <http://www.gnu.org/licenses/>.
 ******************************************************************************/
package edu.cmu.cs.lti.ark.fn.wordnet;

import java.io.BufferedReader;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;

import edu.cmu.cs.lti.ark.util.SerializedObjects;
import edu.cmu.cs.lti.ark.fn.wordnet.WordNetAPI.RelationType;

import net.didion.jwnl.data.POS;


import gnu.trove.THashMap;
import gnu.trove.THashSet;

public class WordNetRelations
{
	public static final String NO_RELATION = "no-relation";
	private static Pattern puncPattern = Pattern.compile("\\p{Punct}");
	private static final int NUM_THRESH = 4;
	
	private String sourceWord = null;
	private String targetWord = null;
	
	private WordNetAPI mWN = null;
	
	//contains all the relations for a word
	private Map<String, THashMap<String, Set<String>>> wordNetMap = new THashMap<String, THashMap<String, Set<String>>>(1000);
	
	public Map<String, THashMap<String, Set<String>>> getWordNetMap() {
		return wordNetMap;
	}

	public void setWordNetMap(Map<String, THashMap<String, Set<String>>> wordNetMap) {
		this.wordNetMap = wordNetMap;
	}

	//for one word, contains the list of ALL related words
	private Map<String, Set<String>> relatedWordsForWord = new THashMap<String,Set<String>>();
	
	public Map<String, Set<String>> getRelatedWordsForWord() {
		return relatedWordsForWord;
	}

	public void setRelatedWordsForWord(Map<String, Set<String>> relatedWordsForWord) {
		this.relatedWordsForWord = relatedWordsForWord;
	}

	//mapping a pair of words to a set of relations
	private Map<String, Set<String>> wordPairMap = new THashMap<String, Set<String>>(); 
		
	private Set<String> stopwords = null;
	
	public THashMap<String, Set<String>> workingRelationSet = null;
	
	public Set<String> workingRelatedWords = null;
	
	public Set<String> workingLSRelations = null;
		
	//mapping a word to its lemma 
	public Map<String,String> wordLemmaMap = new THashMap<String, String>();
	
	public WordNetRelations(String stopWordFile, String configFile)
	{
		initializeStopWords(stopWordFile);
		initializeWordNet(configFile);
	}
	
	public WordNetRelations(String serializedFile)
	{
		WordnetCache wc = (WordnetCache)SerializedObjects.readSerializedObject(serializedFile);
		relatedWordsForWord = wc.getRelatedWordsForWordMap();
		wordNetMap = wc.getWordnetMap();
		wordPairMap = wc.getWordPairMap();
		wordLemmaMap = wc.getWordLemmaMap();
	}
	
	public void setCache(String serializedFile)
	{
		WordnetCache wc = (WordnetCache)SerializedObjects.readSerializedObject(serializedFile);
		relatedWordsForWord = wc.getRelatedWordsForWordMap();
		wordNetMap = wc.getWordnetMap();
		wordPairMap = wc.getWordPairMap();
		wordLemmaMap = wc.getWordLemmaMap();
	}
	
	
	public void clearWordNetCache()
	{
		relatedWordsForWord.clear();
		wordNetMap.clear();
		wordPairMap.clear();
		wordLemmaMap.clear();
		mWN.nullInstance();
		sourceWord=null;
		targetWord=null;
	}
	
	public String getLemmaForWord(String word, String pos)
	{
		if(wordLemmaMap.containsKey(word+"_"+pos))
			return wordLemmaMap.get(word+"_"+pos);
		POS wnPOS=null;
		if(pos.startsWith("V"))
		{
			wnPOS = POS.VERB;
		}
		else if(pos.startsWith("J"))
		{
			wnPOS = POS.ADJECTIVE;
		}
		else if(pos.startsWith("R"))
		{
			wnPOS = POS.ADVERB;
		}
		else
			wnPOS = POS.NOUN;
		if(word.equals("'ve"))
			word="have";
		else if(word.equals("n't"))
			word="not";
		else if(word.equals("'s")&&pos.startsWith("V"))
			word="is";
		else if(word.equals("'ll"))
			word="will";
		else if(word.equals("'re"))
			word="are";
		String lemma=getLemma(word, wnPOS);	
		wordLemmaMap.put(word+"_"+pos, lemma);
		return lemma;
	}	
	
	public void writeWordNetCache(String serializedFile)
	{
		WordnetCache wc = new WordnetCache();
		wc.setRelatedWordsForWordMap(relatedWordsForWord);
		wc.setWordnetMap(wordNetMap);
		wc.setWordPairMap(wordPairMap);
		wc.setWordLemmaMap(wordLemmaMap);
		
		SerializedObjects.writeSerializedObject(wc, serializedFile);
	}	
	
	private void initializeStopWords(String stopFile)
	{
		stopwords = new THashSet<String>();
		try {
			BufferedReader bReader = new BufferedReader(new FileReader(stopFile));
			String line = null;
			while((line=bReader.readLine())!=null)
			{
				stopwords.add(line.trim());
			}
		} catch (Exception e) {
			System.err.println("Problem initializing stopword file");
			e.printStackTrace();
		}
	}	
	
	public THashMap<String, Set<String>> getAllRelationsMap(String sWord)
	{
		/*
		 * when sWord = sourceWord
		 */
		if(sWord.equals(sourceWord))
		{
			return workingRelationSet;
		}
		
		sourceWord = sWord;
		
		/*
		 * when sourceWord is contained in the memory 
		 */
		if(relatedWordsForWord.containsKey(sourceWord))
		{
			workingRelationSet = wordNetMap.get(sourceWord);
			workingRelatedWords = relatedWordsForWord.get(sourceWord);
		}
		/*
		 * when sourceWord is not contained in memory
		 */
		else
		{
			updateMapWithNewSourceWord();
		}
		targetWord=null;
		workingLSRelations=null;
		
		return workingRelationSet;
	}
	
	public Set<String> getAllRelatedWords(String sWord)
	{
		/*
		 * when sWord = sourceWord
		 */
		if(sWord.equals(sourceWord))
		{
			return workingRelatedWords;
		}
		
		sourceWord = sWord;
		
		/*
		 * when sourceWord is contained in the memory 
		 */
		if(relatedWordsForWord.containsKey(sourceWord))
		{
			workingRelationSet = wordNetMap.get(sourceWord);
			workingRelatedWords = relatedWordsForWord.get(sourceWord);
		}
		/*
		 * when sourceWord is not contained in memory
		 */
		else
		{
			updateMapWithNewSourceWord();
		}
		targetWord=null;
		workingLSRelations=null;
		
		return workingRelatedWords;
	}
	
	public void updateMapWithNewSourceWord()
	{
		Map<RelationType, Set<String>> rel = null;
		Set<String> relatedWords = null;
		//if punctuation
		if(stopwords.contains(sourceWord.toLowerCase()) || puncPattern.matcher(sourceWord.toLowerCase()).matches())
		{
			rel  = mWN.fillStopWord(sourceWord);
			relatedWords = mWN.getRelatedWord();
		}
		else if(isMoreThanThresh())
		{
			rel  = mWN.fillStopWord(sourceWord);
			relatedWords = mWN.getRelatedWord();
		}
		else
		{
			rel = mWN.getAllRelatedWords(sourceWord);
			relatedWords = mWN.getRelatedWord();
		}
		workingRelationSet = collapseFinerRelations(rel);
		workingRelatedWords = refineRelatedWords(relatedWords);
		wordNetMap.put(sourceWord,workingRelationSet);
		relatedWordsForWord.put(sourceWord,workingRelatedWords);
	}
	
	public boolean isMoreThanThresh()
	{
		String[] arr = sourceWord.trim().split(" ");
		if(arr.length>NUM_THRESH)
			return true;
		else
			return false;
	}
	
	public Set<String> getRelations(String sWord, String tWord)
	{
		
		/*
		 * when sWord = sourceWord and tWord = targetWord
		 */
		if(sWord.equals(sourceWord)&&tWord.equals(targetWord))
		{
			return workingLSRelations;
		}
		
		/*
		 * when the pair is contained in the map
		 * it is assumed that the source word's whole wordnet map is present in the memory
		 */
		String pair = sWord+"-"+tWord;
		Set<String> relations = wordPairMap.get(pair);
		if(relations!=null)
		{
			sourceWord = new String(sWord);
			targetWord = new String(tWord);
			workingRelationSet = wordNetMap.get(sourceWord);
			workingRelatedWords = relatedWordsForWord.get(sourceWord);
			workingLSRelations = relations;
			return relations;
		}
				
		/*
		 * when sWord is the present sourceWord, workingRelatedWords & wordkingRelationSet need not be updated
		 */
		targetWord = new String(tWord);
		if(sWord.equals(sourceWord))
		{
			Set<String> pairRelations = getRelationWN();
			workingLSRelations = pairRelations;
			wordPairMap.put(pair, pairRelations);
			return pairRelations;
		}	
		
		
		sourceWord=new String(sWord);
		/*
		 * when sourceWord is contained in the memory; workingLSRelations, workingRelatedWords & workingRelationSet
		 * have to be updated
		 */
		if(relatedWordsForWord.containsKey(sourceWord))
		{
			workingRelationSet = wordNetMap.get(sourceWord);
			workingRelatedWords = relatedWordsForWord.get(sourceWord);
		}
		/*
		 * when sourceWord is not contained in the memory; workingLSRelations, workingRelatedWords & workingRelationSet
		 * have to be updated
		 */
		else
		{
			updateMapWithNewSourceWord();
		}	
		THashSet<String> set = getRelationWN();
		workingLSRelations = set;
		wordPairMap.put(pair, set);
		
		return set;	
	}
	
	
	public THashSet<String> getAllPossibleRelationSubset(String sWord)
	{
		//putting stuff into the map
		getAllRelatedWords(sWord);
		
		THashSet<String> result =  new THashSet<String>();
		result.add(new String(NO_RELATION));
		
		/*
		 * workingRelatedWords contains all the related words
		 */
		Iterator<String> itr = workingRelatedWords.iterator();		
		while(itr.hasNext())
		{
			Set<String> relations = getRelations(sWord,itr.next());
			String[] array = new String[relations.size()];
			relations.toArray(array);
			Arrays.sort(array);
			String concat = "";
			for(String rel: array)
			{
				concat+=rel+":";
			}
			if(!result.contains(concat))
			{
				result.add(concat);
			}
		}
		
		return result;
	}
	
	public THashMap<Set<String>,Set<String>> getAllPossibleRelationSubset2(String sWord)
	{
		//putting stuff into the map
		getAllRelatedWords(sWord);
		
		THashMap<Set<String>,Set<String>> result =  new THashMap<Set<String>,Set<String>>();
		Set<String> set = new THashSet<String>();
		set.add(new String(NO_RELATION));
		result.put(set,null);
		
		/*
		 * workingRelatedWords contains all the related words
		 */
		Iterator<String> itr = workingRelatedWords.iterator();		
		while(itr.hasNext())
		{
			String itrWord = itr.next();
			Set<String> relations = getRelations(sWord,itrWord);
			if(!result.contains(relations))
			{
				Set<String> wordSet = new THashSet<String>();
				wordSet.add(itrWord);
				result.put(relations,wordSet);
			}
			else
			{
				Set<String> wordSet = result.get(relations);
				wordSet.add(itrWord);
			}
		}		
		return result;
	}
	
	private Set<String> refineRelatedWords(Set<String> relatedWords)
	{
		if(sourceWord==null)
		{
			System.out.println("Problem. Source Word Null. Exiting");
			System.exit(0);
		}
		if(sourceWord.charAt(0)>='0'&&sourceWord.charAt(0)<='9')
			relatedWords.add(sourceWord);
		return relatedWords;
	}
	
	private THashMap<String, Set<String>> collapseFinerRelations(Map<RelationType, Set<String>> rel)
	{
		THashMap<String,Set<String>> result = new THashMap<String,Set<String>>();
		THashSet<String> identity = new THashSet<String>();
		THashSet<String> synonym = new THashSet<String>();
		THashSet<String> antonym = new THashSet<String>();
		THashSet<String> hypernym = new THashSet<String>();
		THashSet<String> hyponym = new THashSet<String>();
		THashSet<String> derivedForm = new THashSet<String>();
		THashSet<String> morphSet = new THashSet<String>();
		THashSet<String> verbGroup = new THashSet<String>();
		THashSet<String> entailment = new THashSet<String>();
		THashSet<String> entailedBy = new THashSet<String>();
		THashSet<String> seeAlso = new THashSet<String>();
		THashSet<String> causalRelation = new THashSet<String>();
		THashSet<String> sameNumber = new THashSet<String>();
		
		identity.addAll(rel.get(RelationType.idty));
		synonym.addAll(rel.get(RelationType.synm)); synonym.addAll(rel.get(RelationType.syn2));
		antonym.addAll(rel.get(RelationType.antm)); antonym.addAll(rel.get(RelationType.extd)); antonym.addAll(rel.get(RelationType.indi));
		hypernym.addAll(rel.get(RelationType.hype));
		hyponym.addAll(rel.get(RelationType.hypo));
		derivedForm.addAll(rel.get(RelationType.derv));
		morphSet.addAll(rel.get(RelationType.morph));
		verbGroup.addAll(rel.get(RelationType.vgrp));
		entailment.addAll(rel.get(RelationType.entl));
		entailedBy.addAll(rel.get(RelationType.entlby));
		seeAlso.addAll(rel.get(RelationType.alsoc));
		causalRelation.addAll(rel.get(RelationType.cause));
		if(sourceWord==null)
		{
			System.out.println("Problem. Source Word Null. Exiting");
			System.exit(0);
		}
		if(sourceWord.charAt(0)>='0'&&sourceWord.charAt(0)<='9')
			sameNumber.add(sourceWord);
		
		
		result.put("identity",identity);
		result.put("synonym",synonym);
		result.put("antonym",antonym);
		result.put("hypernym",hypernym);
		result.put("hyponym",hyponym);
		result.put("derived-form",derivedForm);
		result.put("morph",morphSet);
		result.put("verb-group",verbGroup);
		result.put("entailment",entailment);
		result.put("entailed-by",entailedBy);
		result.put("see-also",seeAlso);
		result.put("causal-relation",causalRelation);
		result.put("same-number", sameNumber);
		
		return result;
	}
	
	
	private THashSet<String> getRelationWN()
	{
		THashSet<String> result = new THashSet<String>();
		if(!workingRelatedWords.contains(targetWord))
		{
			result.add(NO_RELATION);
			return result;
		}
		Set<String> keys  = workingRelationSet.keySet();
		Iterator<String> keyIterator = keys.iterator();
		while(keyIterator.hasNext())
		{
			String key = keyIterator.next();
			Set<String> words = workingRelationSet.get(key);
			if(words.contains(targetWord))
				result.add(key);
		}			
		
		return result;
	}
	
	public static String[] getLexSemRelationList()
	{
		ArrayList<String> list = new ArrayList<String>();
		list.add("identity");
		list.add("synonym");
		list.add("antonym");
		list.add("hypernym");
		list.add("hyponym");
		list.add("derived-form");
		list.add("morph");
		list.add("verb-group");
		list.add("entailment");
		list.add("entailed-by");
		list.add("see-also");
		list.add("causal-relation");
		list.add("same-number");
		list.add(NO_RELATION);	
		
		String[] rels = new String[list.size()];
		return list.toArray(rels);
	}
	
	
	
	private void initializeWordNet(String configFile)
	{
		try {
			mWN = WordNetAPI.getInstance(configFile);
		} catch (Exception e) {
			System.out.println("Could not initialize wordnet. Exiting.");
			e.printStackTrace();
			System.exit(0);
		}	
	}
	
	public String getLemma(String word, POS pos)
	{
		return WordNetAPI.getLemma(word, pos);
	}
	
}