java source code of MinIE

package de.uni_mannheim.minie;

import java.util.ArrayList;
import java.util.List;
import java.util.Set;

import de.uni_mannheim.clausie.ClausIE;
import de.uni_mannheim.clausie.clause.Clause;
import de.uni_mannheim.clausie.phrase.Phrase;
import de.uni_mannheim.clausie.proposition.Proposition;
import de.uni_mannheim.constant.NE_TYPE;
import de.uni_mannheim.constant.POS_TAG;
import de.uni_mannheim.constant.REGEX;
import de.uni_mannheim.constant.SEPARATOR;
import de.uni_mannheim.minie.annotation.AnnotatedPhrase;
import de.uni_mannheim.minie.annotation.AnnotatedProposition;
import de.uni_mannheim.minie.annotation.Attribution;
import de.uni_mannheim.minie.annotation.Modality;
import de.uni_mannheim.minie.annotation.Polarity;
import de.uni_mannheim.minie.minimize.object.ObjAggressiveMinimization;
import de.uni_mannheim.minie.minimize.object.ObjDictionaryMinimization;
import de.uni_mannheim.minie.minimize.object.ObjSafeMinimization;
import de.uni_mannheim.minie.minimize.relation.RelAggressiveMinimization;
import de.uni_mannheim.minie.minimize.relation.RelDictionaryMinimization;
import de.uni_mannheim.minie.minimize.relation.RelSafeMinimization;
import de.uni_mannheim.minie.minimize.subject.SubjAggressiveMinimization;
import de.uni_mannheim.minie.minimize.subject.SubjDictionaryMinimization;
import de.uni_mannheim.minie.minimize.subject.SubjSafeMinimization;
import de.uni_mannheim.minie.proposition.ImplicitExtractions;

import de.uni_mannheim.utils.phrase.PhraseUtils;
import de.uni_mannheim.utils.Dictionary;
import de.uni_mannheim.utils.coreNLP.CoreNLPUtils;

import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.ling.tokensregex.TokenSequenceMatcher;
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.trees.EnglishGrammaticalRelations;
import edu.stanford.nlp.util.CoreMap;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;

/**
 * @author Kiril Gashteovski
 */
public class MinIE {
    /** List of annotated propositions **/
    private ObjectArrayList<AnnotatedProposition> propositions;

    /** The semantic graph of the whole sentence **/
    private SemanticGraph sentenceSemGraph;
    
    /** The whole sentence as a list of indexed words **/
    private ObjectArrayList<IndexedWord> sentence;

    /** The whole original sentence as a list of indexed words **/
    private ObjectArrayList<IndexedWord> originalSentence;
    
    /** Reusability variables **/
    private ObjectOpenHashSet<String> propsWithAttribution;
    
    /** Constructor **/
    public MinIE(ObjectArrayList<AnnotatedProposition> props){
        this.propositions = props;
    }
    
    /** MinIE mode **/
    public enum Mode {
        AGGRESSIVE,
        DICTIONARY,
        SAFE,
        COMPLETE
    }
    
    /** Default constructor **/
    public MinIE(){
        this.propositions = new ObjectArrayList<AnnotatedProposition>();
        this.sentenceSemGraph = new SemanticGraph();
        this.sentence = new ObjectArrayList<>();
        this.propsWithAttribution = new ObjectOpenHashSet<>();
    }
    
    /**
     * @param sentence - input sentence
     * @param parser - dependency parse pipeline of the sentence
     * @param mode - the minimization mode
     * @param d - dictionary of multi-word expressions (for MinIE-D)
     */
    public MinIE(String sentence, StanfordCoreNLP parser, Mode mode, Dictionary d) {
        // Initializations
        this.propositions = new ObjectArrayList<AnnotatedProposition>();
        this.sentenceSemGraph = new SemanticGraph();
        this.sentence = new ObjectArrayList<>();
        this.propsWithAttribution = new ObjectOpenHashSet<>();
        
        this.minimize(sentence, parser, mode, d);
    }
    
    /**
     * @param sentence - input sentence
     * @param parser - dependency parse pipeline of the sentence
     * @param mode - the minimization mode
     * 
     * NOTE: If mode is MinIE-D, then this will proceed as MinIE-D but with empty dictionary 
     * (i.e. will drop every word that is a candidate)
     */
    public MinIE(String sentence, StanfordCoreNLP parser, Mode mode) {
        this.propositions = new ObjectArrayList<AnnotatedProposition>();
        this.sentenceSemGraph = new SemanticGraph();
        this.sentence = new ObjectArrayList<>();
        this.propsWithAttribution = new ObjectOpenHashSet<>();
        
        this.minimize(sentence, parser, mode, new Dictionary());
    }
    
    /**
     * @param sentence - input sentence
     * @param sg - dependency parse graph of the sentence
     * @param mode - the minimization mode
     * 
     * NOTE: If mode is MinIE-D, then this will proceed as MinIE-D but with empty dictionary 
     * (i.e. will drop every word that is a candidate)
     */
    public MinIE(String sentence, SemanticGraph sg, Mode mode) {
        this.propositions = new ObjectArrayList<AnnotatedProposition>();
        this.sentenceSemGraph = new SemanticGraph();
        this.sentence = new ObjectArrayList<>();
        this.propsWithAttribution = new ObjectOpenHashSet<>();
        
        this.minimize(sentence, sg, mode, new Dictionary());
    }

    /**
     * @param sentence - input sentence
     * @param sg - dependency parse graph of the sentence
     * @param mode - the minimization mode
     * @param d - dictionary of multi-word expressions (for MinIE-D)
     */
    public MinIE(String sentence, SemanticGraph sg, Mode mode, Dictionary dict) {
        this.propositions = new ObjectArrayList<AnnotatedProposition>();
        this.sentenceSemGraph = new SemanticGraph();
        this.sentence = new ObjectArrayList<>();
        this.propsWithAttribution = new ObjectOpenHashSet<>();
        
        this.minimize(sentence, sg, mode, dict);
    }
    
    /** 
     * Given an input sentence, parser, mode and a dictionary, make extractions and then minimize them accordingly.
     * The parsing occurs INSIDE this function.
     * 
     * @param sentence - input sentence
     * @param parser - dependency parse pipeline for the sentence
     * @param mode - minimization mode
     * @param d - dictionary (for MinIE-D)
     */
    public void minimize(String sentence, StanfordCoreNLP parser, Mode mode, Dictionary d) {
        // Run ClausIE first
        ClausIE clausie = new ClausIE();
        clausie.setSemanticGraph(CoreNLPUtils.parse(parser, sentence));
        clausie.detectClauses();
        clausie.generatePropositions(clausie.getSemanticGraph());
        
        // Start minimizing by annotating
        this.setSemanticGraph(clausie.getSemanticGraph());
        this.setPropositions(clausie);
        this.setPolarity();
        this.setModality();
        
        // Minimize according to the modes (COMPLETE mode doesn't minimize) 
        if (mode == Mode.SAFE)
            this.minimizeSafeMode();
        else if (mode == Mode.DICTIONARY)
            this.minimizeDictionaryMode(d.words());
        else if (mode == Mode.AGGRESSIVE)
            this.minimizeAggressiveMode();
        
        this.removeDuplicates();
    }
    
    /** 
     * Given an input sentence, dependency parse, mode and a dictionary, make extractions and then minimize them accordingly.
     * The parsing occurs OUTSIDE this function.
     * 
     * @param sentence - input sentence
     * @param sg - semantic graph object (dependency parse of the sentence)
     * @param mode - minimization mode
     * @param d - dictionary (for MinIE-D)
     */
    public void minimize(String sentence, SemanticGraph sg, Mode mode, Dictionary d) {
        // Run ClausIE first
        ClausIE clausie = new ClausIE();
        clausie.setSemanticGraph(sg);
        clausie.detectClauses();
        clausie.generatePropositions(clausie.getSemanticGraph());
        
        // Start minimizing by annotating
        this.setSemanticGraph(clausie.getSemanticGraph());
        this.setPropositions(clausie);
        this.setPolarity();
        this.setModality();
        
        // Minimize according to the modes (COMPLETE mode doesn't minimize) 
        if (mode == Mode.SAFE)
            this.minimizeSafeMode();
        else if (mode == Mode.DICTIONARY)
            this.minimizeDictionaryMode(d.words());
        else if (mode == Mode.AGGRESSIVE)
            this.minimizeAggressiveMode();
        
        this.removeDuplicates();
    }
    
    /** Clear the variables **/
    public void clear(){
        this.propositions.clear();
        this.sentenceSemGraph = null;
        this.sentence.clear();
        this.propsWithAttribution.clear();
    }
    
    /**
     * Getter for the propositions
     * @return: list of propositions (which are a list of phrases)
     */
    public ObjectArrayList<AnnotatedProposition> getPropositions(){
        return this.propositions;
    }
    public AnnotatedProposition getProposition(int i){
        return this.propositions.get(i);
    }
    public SemanticGraph getSentenceSemanticGraph(){
        return this.sentenceSemGraph;
    }
    public int getPropositionSize(int i){
        return this.propositions.get(i).getTriple().size();
    }
    public ObjectArrayList<IndexedWord> getSentenceWords() {
        return this.sentence;
    }
    public ObjectArrayList<IndexedWord> getOriginalSentence() {
        return this.originalSentence;
    }
    
    /**
     * Getters for the negative, certain or possibility propositions
     */
    public boolean isPossibility(int i){
        return this.isPossibility(i);
    }
    public boolean isCertainty(int i){
        return this.isCertainty(i);
    }
    
    /**
     * Other getters
     */
    public AnnotatedPhrase getSubject(int i){
        return this.propositions.get(i).getSubject();
    }
    public AnnotatedPhrase getRelation(int i){
        return this.propositions.get(i).getRelation();
    }
    /** Assuming that the proposition is a triple, it will return the third constituent in the list */
    public AnnotatedPhrase getObject(int i){
        return this.propositions.get(i).getObject();
    }
    
    /**
     * Setters
     */
    public void setPropositions(ObjectArrayList<AnnotatedProposition> props){
        this.propositions = props;
    }
    public void setProposition(int i, AnnotatedProposition prop){
        this.propositions.set(i, prop);
    }
    public void setSubject(int i, AnnotatedPhrase subj){
        this.propositions.get(i).setSubject(subj);
    }
    public void setRelation(int i, AnnotatedPhrase rel){
        this.propositions.get(i).setRelation(rel);
    }
    public void setObject(int i, AnnotatedPhrase obj){
        this.propositions.get(i).setObject(obj);
    }
    public void setAttribution(int i, Attribution s){
        this.propositions.get(i).setAttribution(s);
    }
    public void setSentenceWords(ObjectArrayList<IndexedWord> s) {
        this.sentence = s;
    }
    
    /** Reset a attribution of the list **/
    public void resetAttribution(int i){
        this.propositions.get(i).setAttribution(new Attribution());
    }
    
    /**
     * Given a proposition, detect the attribution. Returns true if a attribution was detected and false otherwise.
     * @param proposition
     */
    public boolean detectAttribution(Proposition proposition){
        // If the proposition is of size 2, return (nothing to detect here, it's an SV)
        if (proposition.getConstituents().size() < 3)
            return false;
        
        // Attribution flag is set to 'false' by default
        boolean attributionDetected = false;
        
        // Reusable variables
        ClausIE clausieObj = new ClausIE();
        StringBuffer sb = new StringBuffer();
        ObjectArrayList<IndexedWord> tempListOfWords = new ObjectArrayList<IndexedWord>();
        
        // Elements of the triple
        AnnotatedPhrase subject = new AnnotatedPhrase(proposition.subject());
        AnnotatedPhrase relation = new AnnotatedPhrase(proposition.relation());
        AnnotatedPhrase object = new AnnotatedPhrase(proposition.object());
        
        // Get the root and if it's null, return 'true'
        relation.setRoot(CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, relation.getWordList()));
        IndexedWord root = relation.getRoot();
        if (root == null) return true;
        
        // Detect "according to..." patterns by checking the adverbials (i.e. the objects)
        if (object.getWordList().size() > 2){
            if (object.getWordList().get(0).word().toLowerCase().equals(Attribution.ACCORDING) && 
                    object.getWordList().get(1).tag().equals(POS_TAG.TO)){
                tempListOfWords.clear();
                tempListOfWords.addAll(subject.getWordList());
                tempListOfWords.addAll(relation.getWordList());
                SemanticGraph newsg = CoreNLPUtils.getSubgraphFromWords(this.sentenceSemGraph, tempListOfWords);
                        
                // The attribution predicate "according to"
                sb.append(Attribution.ACCORDING);
                sb.append(SEPARATOR.SPACE);
                sb.append(object.getWordList().get(1).word());
                
                this.generatePropositionsWithAttribution(clausieObj, newsg, new Attribution(
                                    new AnnotatedPhrase(object.getWordSubList(2, object.getWordList().size()-1)), 
                                    Polarity.Type.POSITIVE, 
                                    Modality.Type.CERTAINTY, 
                                    sb.toString().trim()));
                sb.setLength(0);
                attributionDetected = true;
            }    
        }
        
        // Modality and polarity of the attribution (detecting attribution with predicates)
        Polarity.Type pol = Polarity.Type.POSITIVE; // TODO: default value; this is temporary
        Modality.Type mod = null;
        IndexedWord relHead = relation.getRoot();
        if (Modality.VERB_CERTAINTY.contains(relHead.lemma().toLowerCase())){
            // By default, the modality is CERTAINTY unless proven otherwise
            mod = Modality.Type.CERTAINTY;
            
            // If the head verb of the relation is negated, set polarity to NEGATIVE
            if (sentenceSemGraph.getChildWithReln(relHead, EnglishGrammaticalRelations.NEGATION_MODIFIER) != null){
                pol = Polarity.Type.NEGATIVE;
            }
            
            // If there is a modal verb as a modifier of the head verb, make it a possibility modality type
            Set<IndexedWord> auxs = sentenceSemGraph.getChildrenWithReln(relHead, EnglishGrammaticalRelations.AUX_MODIFIER);
            if (!auxs.isEmpty()){
                for (IndexedWord w: auxs){
                    if (w.tag().equals(POS_TAG.MD)){
                        mod = Modality.Type.POSSIBILITY;
                    }
                }
            }
        }
        else if (Modality.VERB_POSSIBILITY.contains(relHead.lemma().toLowerCase())){
            mod = Modality.Type.POSSIBILITY;
            
            // If the head verb of the relation is negated, set polarity to NEGATIVE
            if (sentenceSemGraph.getChildWithReln(relHead, EnglishGrammaticalRelations.NEGATION_MODIFIER) != null){
                pol = Polarity.Type.NEGATIVE;
            }
            
            // If there is a modal verb as a modifier of the head verb, make it a possibility modality type
            Set<IndexedWord> auxs = sentenceSemGraph.getChildrenWithReln(relHead, EnglishGrammaticalRelations.AUX_MODIFIER);
            if (!auxs.isEmpty()){
                for (IndexedWord w: auxs){
                    if (w.tag().equals(POS_TAG.MD)){
                        mod = Modality.Type.POSSIBILITY;
                    }
                }
            }
        }
        
        
        // If a predicate is found
        List<SemanticGraphEdge> nsubjs;
        List<IndexedWord> nsubjChildren;
        if (mod != null){    
            // Stop searching if there's no verb in the object
            if (!CoreNLPUtils.verbInList(object.getWordList())){
                return false;
            }
                        
            // Get the subject relationships
            nsubjs = this.sentenceSemGraph.findAllRelns(EnglishGrammaticalRelations.NOMINAL_SUBJECT);
            nsubjs.addAll(this.sentenceSemGraph.findAllRelns(EnglishGrammaticalRelations.CLAUSAL_SUBJECT));
            nsubjs.addAll(this.sentenceSemGraph.findAllRelns(EnglishGrammaticalRelations.SUBJECT));
            nsubjs.addAll(this.sentenceSemGraph.findAllRelns(EnglishGrammaticalRelations.CLAUSAL_COMPLEMENT));
            
            nsubjChildren = new ArrayList<IndexedWord>();
            for (SemanticGraphEdge e: nsubjs){
                nsubjChildren.add(e.getDependent());
            }
                   
            // Iterate through the subjects
            for (IndexedWord child: nsubjChildren){
                // Process only the ones that have verbs in the object
                if (CoreNLPUtils.verbInList(object.getWordList()) && object.getWordList().contains(child)){   
                    SemanticGraph objSg = CoreNLPUtils.getSubgraphFromWords(this.sentenceSemGraph, object.getWordList());
                    this.generatePropositionsWithAttribution(clausieObj, objSg, new Attribution(subject, pol, mod, 
                                                                                      relation.getRoot().lemma()));
                    attributionDetected = true;
                }
            }
        }
        
        return attributionDetected;
    }
    
    /**
     * Given a ClausIE object, semantic graph object and a attribution, make new extractions from the object, 
     * add them in the list of propositions and add the attribution as well.
     * 
     * @param clausieObj: ClausIE object (reusable variable)
     * @param objSg: semantic graph object of the object
     * @param s: the attribution
     */
    public void generatePropositionsWithAttribution(ClausIE clausieObj, SemanticGraph objSg, Attribution s){
        // New clausie object
        clausieObj.clear();
        clausieObj.setSemanticGraph(objSg);
        clausieObj.detectClauses();
        clausieObj.generatePropositions(clausieObj.getSemanticGraph());
        
        // Reusable variable for annotated phrases
        AnnotatedPhrase aPhrase = new AnnotatedPhrase();
        
        for (Clause c: clausieObj.getClauses()){
            for (Proposition p: c.getPropositions()){
                // Add the proposition from ClausIE to the list of propositions of MinIE
                ObjectArrayList<AnnotatedPhrase> prop = new ObjectArrayList<AnnotatedPhrase>();
                for (int i = 0; i < p.getConstituents().size(); i++){
                    aPhrase = new AnnotatedPhrase(p.getConstituents().get(i));
                    aPhrase.detectQuantities(this.sentenceSemGraph, i);
                    aPhrase.annotateQuantities(i);
                    prop.add(aPhrase);
                }
                if (this.pruneAnnotatedProposition(prop))
                    continue;
                AnnotatedProposition aProp = new AnnotatedProposition(prop, new Attribution(s));
                this.pushWordsToRelation(aProp);
                
                this.propositions.add(aProp);
                this.propsWithAttribution.add(PhraseUtils.listOfAnnotatedPhrasesToString(prop));
            }
        }
    }
    
    public void pushWordsToRelationsInPropositions() {
        for (int i = 0; i < this.propositions.size(); i++) {
            this.pushWordsToRelation(this.propositions.get(i));
        }
    }
    
    /**
     * Given a ClausIE object, set the prepositions from ClausIE to MinIE (don't annotate neg. and poss.)
     * While assigning the propositions, these are things that are done:
     *  * detect attributions
     *  * push words to the relation (if possible)
     * @param clausie: clausie object containing clause types, propositions, sentence dependency parse, ... 
     */
    public void setPropositions(ClausIE clausie){
        // Attribution detection flag + set of strings for propositions with attribution 
        boolean attributionDetected = false;
        this.propsWithAttribution = new ObjectOpenHashSet<String>(); 
        StringBuffer sb = new StringBuffer();
        
        // Set the sentence, make the implicit extractions from it, and add them to the list of propositions
        this.sentence = new ObjectArrayList<IndexedWord> (clausie.getSemanticGraph().vertexListSorted());
        this.originalSentence = this.sentence;
        ImplicitExtractions extractions = new ImplicitExtractions(this.sentence, this.sentenceSemGraph);
        extractions.generateImplicitExtractions();
        int id = 0;
        for (AnnotatedProposition aProp: extractions.getImplicitExtractions()) {
            id++;
            aProp.setId(id);
            this.propositions.add(aProp);
        }
        
        // Set the propositions extracted from ClausIE to MinIE
        for (Clause clause: clausie.getClauses()){
            for (Proposition proposition: clause.getPropositions()){
                id++;
                // If a attribution is detected, add the content of the proposition to the list
                attributionDetected = this.detectAttribution(proposition);                
                //if (attributionDetected) {
                //    propsWithAttribution.add(proposition.object().getWords());
                //}
                
                // Don't add the proposition if an attribution is detected or its content has an attribution already 
                if (attributionDetected || this.propsWithAttribution.contains(proposition.propositionToString()))
                    continue;
                
                // Add the proposition from ClausIE to the list of propositions of MinIE
                ObjectArrayList<AnnotatedPhrase> prop = new ObjectArrayList<AnnotatedPhrase>();
                for (int i = 0; i < proposition.getConstituents().size(); i++){
                    AnnotatedPhrase aPhrase = new AnnotatedPhrase(proposition.getConstituents().get(i));
                    aPhrase.detectQuantities(clausie.getSemanticGraph(), i);
                    aPhrase.annotateQuantities(i);
                    prop.add(aPhrase);
                }
                
                // Prune proposition if needed
                if (this.pruneAnnotatedProposition(prop)){
                    continue;
                }
                
                //Annotated proposition
                AnnotatedProposition aProp = new AnnotatedProposition(prop, id);
                // Push words to relation
                this.pushWordsToRelation(aProp);
                
                // Handle possessives
                // TODO: check this out
                //this.processPoss(prop);
                
                this.propositions.add(aProp);
            }
        }
        
        // Remove proposiions which have no attributions, but they have duplicate propositions having an attribution
        // TODO: temporary solution, make this in removeDuplicates()
        ObjectArrayList<AnnotatedProposition> delProps = new ObjectArrayList<AnnotatedProposition>();
        ObjectArrayList<Attribution> delAttributions = new ObjectArrayList<>();
        ObjectOpenHashSet<String> propWithAttributions = new ObjectOpenHashSet<String>();
        String thisProp;
        for (int i = 0; i < this.propositions.size(); i++){
            thisProp = PhraseUtils.listOfAnnotatedPhrasesToString(this.propositions.get(i).getTriple());
            // Remove proposiions which have no attributions, but they have duplicate propositions having an attribution
            if (this.propsWithAttribution.contains(thisProp)){
                if (this.propositions.get(i).getAttribution().getAttributionPhrase() == null){
                    delAttributions.add(this.propositions.get(i).getAttribution());
                    delProps.add(this.propositions.get(i));
                } else {
                    sb.append(thisProp);
                    sb.append(SEPARATOR.SPACE);
                    sb.append(this.propositions.get(i).getAttribution().toString());
                    if (propWithAttributions.contains(sb.toString())){
                        delAttributions.add(this.propositions.get(i).getAttribution());
                        delProps.add(this.propositions.get(i));
                    }
                    else {
                        propWithAttributions.add(sb.toString());
                    }
                    sb.setLength(0);
                }
            }
        }
        this.propositions.removeAll(delProps);
    }
    
    /**
     * 
     * @param clausie
     */
    public void setPropositionsWithoutAnnotations(ClausIE clausie) {
        // Set the sentence, make the implicit extractions from it, and add them to the list of propositions
        this.sentence = new ObjectArrayList<IndexedWord> (clausie.getSemanticGraph().vertexListSorted());
        this.originalSentence = this.sentence;
        ImplicitExtractions extractions = new ImplicitExtractions(this.sentence, this.sentenceSemGraph);
        extractions.generateImplicitExtractions();
        int id = 0;
        for (AnnotatedProposition aProp: extractions.getImplicitExtractions()) {
            id++;
            aProp.setId(id);
            this.propositions.add(aProp);
        }
        
        // Set the propositions extracted from ClausIE to MinIE
        for (Clause clause: clausie.getClauses()){
            for (Proposition proposition: clause.getPropositions()){
                id++;
                
                // Add the proposition from ClausIE to the list of propositions of MinIE
                ObjectArrayList<AnnotatedPhrase> prop = new ObjectArrayList<AnnotatedPhrase>();
                for (int i = 0; i < proposition.getConstituents().size(); i++){
                    prop.add(new AnnotatedPhrase(proposition.getConstituents().get(i)));
                }
                
                //Annotated proposition
                AnnotatedProposition aProp = new AnnotatedProposition(prop, id);
                this.pushWordsToRelation(aProp);
                
                this.propositions.add(aProp);
            }
        }
        
        this.removeDuplicates();
    }
    
    /**
     * Process possessives in the object.
     * If we have ("SUBJ", "REL", "NP_1 POS NP_2"), then: ("SUBJ", "REL + NP_1 + of", "NP_2")
     * @param prop: proposition (list of annotated phrases)
     */
    public void processPoss(ObjectArrayList<AnnotatedPhrase> prop){
        // If there's no object (clause type SV), return
        if (prop.size() < 3)
            return;
        
        AnnotatedPhrase object = prop.get(2);
        AnnotatedPhrase rel = prop.get(1);
        TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.T_NP_POS_NP);
        TokenSequenceMatcher tMatcher = tPattern.getMatcher(object.getWordCoreLabelList());
        
        int posIndex = -1;
        
        while (tMatcher.find()){         
            List<CoreMap> match = tMatcher.groupNodes();
            
            // Check if the first/last word of the match is the first/last word of the object
            CoreLabel firstWord = new CoreLabel(match.get(0));
            CoreLabel lastWord = new CoreLabel(match.get(match.size() - 1));
            boolean check = false;
            if (firstWord.index() == object.getWordList().get(0).index()){
                if (lastWord.index() == object.getWordList().get(object.getWordList().size() - 1).index()){
                    check = true;
                }
            }
            if (!check) break;
            
            for (CoreMap cm: match){
                CoreLabel cl = new CoreLabel(cm);
                if (cl.tag().equals(POS_TAG.POS) && (cl.ner().equals(NE_TYPE.NO_NER))){
                    posIndex = object.getWordCoreLabelList().indexOf(cl);
                    break;
                }
            }
        }
        
        if (posIndex > -1){
            IndexedWord of = new IndexedWord();
            of.setOriginalText("of");
            of.setLemma("of");
            of.setWord("of");
            of.setTag("IN");
            of.setNER("O");
            of.setIndex(-1);
            
            ObjectArrayList<IndexedWord> pushedWords = new ObjectArrayList<>();
            object.removeWordFromList(posIndex);
            for (int i = posIndex; i < object.getWordList().size(); i++){
                pushedWords.add(object.getWordList().get(i));
            }
            rel.addWordsToList(pushedWords);
            rel.addWordToList(of);
            object.removeWordsFromList(pushedWords);
        }
    }
    
    /**
     * Given an object phrase, check if it has infinitive verbs modifying a noun phrase or a named entity. 
     * If yes, then return "true", else -> "false"
     * @param object: the object phrase
     * @return
     */
    public boolean pushInfinitiveVerb(Phrase object){
        TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.T_TO_VB_NP_NER);
        TokenSequenceMatcher tMatcher = tPattern.getMatcher(object.getWordCoreLabelList());
        
        while (tMatcher.find()){         
            CoreLabel firstWordMatch = new CoreLabel(tMatcher.groupNodes().get(0));
            if (firstWordMatch.index() == object.getWordList().get(0).index()){
                return true;
            }
        }
        
        return false;
    }
    
    /**
     * Checks if the adverb(s) from the object should be pushed to the relation (if the adverb is followed by preposition 
     * or 'to).
     * @param object: a phrase, the object of the proposition
     * @return true, if an adverb is followed by a preposition or "to"
     */
    public boolean pushAdverb(Phrase object){        
        TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.T_RB_OPT_IN_TO_OPT);
        TokenSequenceMatcher tMatcher = tPattern.getMatcher(object.getWordCoreLabelList());
        while (tMatcher.find()){         
            CoreLabel firstWordMatch = new CoreLabel(tMatcher.groupNodes().get(0));
            if (firstWordMatch.index() == object.getWordList().get(0).index() && 
                    object.getWordList().get(0).ner().equals(NE_TYPE.NO_NER)){
                return true;
            }
        }
        return false;
    }
    
    /** 
     * Given a proposition (list of annotated phrases), push words from objects to the relation if possible
     * @param prop: the proposition (list of annotated phrases)
     */
    public void pushWordsToRelation(AnnotatedProposition prop){
        IndexedWord firstObjectWord = null;
        ObjectArrayList<IndexedWord> pushWords = new ObjectArrayList<>(); 
        
        if (prop.getTriple().size() > 2 && prop.getObject().getWordList().size() > 0){
            firstObjectWord = prop.getObject().getWordList().get(0);

            while ((firstObjectWord != null) && 
                    (firstObjectWord.tag().equals(POS_TAG.IN) || firstObjectWord.tag().equals(POS_TAG.RB) || 
                     firstObjectWord.tag().equals(POS_TAG.WRB)) &&
                     prop.getObject().getWordList().size() > 1){
            
                // If it's an adverb, check if the adverb should be pushed
                if (firstObjectWord.tag().equals(POS_TAG.RB) && !this.pushAdverb(prop.getObject())){
                    break;
                }
                else
                    pushWords.add(firstObjectWord);
                
                // Add the word to the end of the relation, and remove it from the object
                prop.getRelation().addWordsToList(pushWords);
                prop.getObject().removeWordsFromList(pushWords);
                    
                if (prop.getObject().getWordList().size() > 0){
                    firstObjectWord = prop.getObject().getWordList().get(0);
                    pushWords.clear();
                }
                else
                    firstObjectWord = null;    
            }
            
            // If we have TO+ VB* .* NP .* => push TO+ VB* to the relation
            TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.T_TO_VP_IN);
            TokenSequenceMatcher tMatcher = tPattern.getMatcher(prop.getObject().getWordCoreLabelList());
            while (tMatcher.find()){
                List<CoreMap> matches = tMatcher.groupNodes();
                
                // Check if the first word of the matches is the same as the first object word
                CoreLabel firstWord = new CoreLabel(matches.get(0));
                if (firstWord.index() != prop.getObject().getWordList().get(0).index())
                    break;
                
                CoreLabel lastWord = new CoreLabel(matches.get(matches.size() - 1));
                for (CoreMap cm: matches){
                    CoreLabel cl = new CoreLabel(cm);
                    if (cl.ner().equals(NE_TYPE.NO_NER)){
                        // If adverb is not followed by preposition, don't push it
                        if (CoreNLPUtils.isAdverb(cl.tag())){
                            if (cl.index() == lastWord.index()){
                                break;
                            }
                        }
                        // Don't push the last word of the object
                        if (prop.getObject().getWordList().get(prop.getObject().getWordList().size() -1).index() == cl.index())
                            break;
                        // Add the pushed words to the list
                        pushWords.add(new IndexedWord(cl));
                    } else {
                        break;
                    }
                }
                
                // Push the words, clear the list
                prop.getRelation().addWordsToList(pushWords);
                prop.getObject().removeWordsFromList(pushWords);
                pushWords.clear();
            }
            
            
            // After the pushing of the words is done, check for PPs with one of their NPs being a NER
            pushWords.clear();
            tPattern = TokenSequencePattern.compile(REGEX.T_NP_IN_OPT_DT_RB_JJ_OPT_ENTITY);
            tMatcher = tPattern.getMatcher(prop.getObject().getWordCoreLabelList());
            while (tMatcher.find()){
                List<CoreMap> matches = tMatcher.groupNodes();
                CoreLabel firstWord = new CoreLabel(matches.get(0));
                if (firstWord.index() != prop.getObject().getWordList().get(0).index())
                    continue;
                
                CoreLabel prep = new CoreLabel();
                for (CoreMap cm: matches){
                    CoreLabel cl = new CoreLabel(cm);
                    if (!cl.tag().equals(POS_TAG.IN) && !cl.tag().equals(POS_TAG.TO)){
                        pushWords.add(new IndexedWord(cl));
                    } else {
                        pushWords.add(new IndexedWord(cl));
                        prep = cl;
                        break;
                    }
                }
                if (prep.ner().equals(NE_TYPE.NO_NER)){
                    // Add the word to the end of the relation, and remove it from the object
                    prop.getRelation().addWordsToList(pushWords);
                    prop.getObject().removeWordsFromList(pushWords);
                    pushWords.clear();
                }
            }
            
            //TODO: merge this with the previous pushing rules
            // Check if we have NP_1 IN NP_2, but nothing else (no additional prepositions). Push NP_1 to relation
            if (CoreNLPUtils.countPrepositionsInList(prop.getObject().getWordList()) == 1){
                pushWords.clear();
                int prepIndex = -1;
                for (int i = 0; i < prop.getObject().getWordList().size(); i++){
                    if (prop.getObject().getWordList().get(i).tag().equals(POS_TAG.IN) && 
                            prop.getObject().getWordList().get(i).ner().equals(NE_TYPE.NO_NER)){
                        if (prop.getObject().getWordList().get(i).index() == 
                                prop.getObject().getWordList().get(prop.getObject().getWordList().size() - 1).index()){
                            break;
                        }
                        prepIndex = i;
                        break;
                    }
                }
                for (int i = 0; i <= prepIndex; i++){
                    pushWords.add(prop.getObject().getWordList().get(i));
                }
                // Add the word to the end of the relation, and remove it from the object
                prop.getRelation().addWordsToList(pushWords);
                prop.getObject().removeWordsFromList(pushWords);
                pushWords.clear();
            }
        }
    }
    
    /**
     * Because of the annotations sometimes we get duplicates. Prune-out the duplicates
     * TODO
     */
    public void removeDuplicates(){
        ObjectOpenHashSet<String> propStrings = new ObjectOpenHashSet<>();
        ObjectOpenHashSet<String> propStringPS = new ObjectOpenHashSet<>();

        ObjectArrayList<AnnotatedProposition> remProps = new ObjectArrayList<>();
        String propString;
        for (AnnotatedProposition prop: this.propositions){
            if (prop.getModality().getModalityType() == Modality.Type.POSSIBILITY) {
                propStringPS.add(prop.propositionWordsToString());
            }
            propString = prop.toString();
            if (propStrings.contains(propString))
                remProps.add(prop);
            else
                propStrings.add(propString);
        }
        
        // Remove PS duplicates TODO: optimize this
        for (AnnotatedProposition prop: this.propositions) {
            if (prop.getModality().getModalityType() == Modality.Type.CERTAINTY) {
                if (propStringPS.contains(prop.propositionWordsToString())) {
                    remProps.add(prop);
                }
            }
        }
        
        // Also, remove the ones with empty object
        for (int i = 0; i < this.propositions.size(); i++){
            if (this.propositions.get(i).getSubject().getWordList().isEmpty())
                remProps.add(this.propositions.get(i));
            
            if (this.propositions.get(i).getTriple().size() == 3)
                if (this.propositions.get(i).getObject().getWordList().isEmpty())
                    remProps.add(this.propositions.get(i));
        }
        
        this.propositions.removeAll(remProps); 
    }
    
    /**
     * Given a proposition, check if it should be pruned or not.
     * @param proposition
     * @return true, if the proposition should be pruned, false otherwise
     */
    private boolean pruneAnnotatedProposition(ObjectArrayList<AnnotatedPhrase> proposition){
        AnnotatedPhrase subj = proposition.get(0);
        AnnotatedPhrase rel = proposition.get(1);

        // If there is no verb in the relation, prune
        // TODO: check why this is happening! In some of these cases, the verb gets deleted for some reason.
        // This happens when CCs are being processed. Empty relations too
        if (!CoreNLPUtils.hasVerb(rel.getWordList()))
            return true;

        // Empty subject
        if (subj.getWordList().isEmpty())
            return true;

        if (proposition.size() == 3){
            AnnotatedPhrase obj = proposition.get(2);
            // Check if the object is empty (shouldn't happen, but just in case)
            if (obj.getWordList().isEmpty())
                return true;

            // The last word of the object
            IndexedWord w = obj.getWordList().get(obj.getWordList().size()-1);

            // If the last word is preposition
            if (w.tag().equals(POS_TAG.IN) && w.ner().equals(NE_TYPE.NO_NER))
                return true;

            // When the object is consisted of one preposition
            if (obj.getWordList().size() == 1){
                // If the object is just one preposition - prune
                if (w.tag().equals(POS_TAG.IN) || w.tag().equals(POS_TAG.TO)){
                    return true;
                }
            }
            // When the object ends with one of the POS tags: WDT, WP$, WP or WRB
            if (w.tag().equals(POS_TAG.WDT) || w.tag().equals(POS_TAG.WP) ||
                    w.tag().equals(POS_TAG.WP_P) || w.tag().equals(POS_TAG.WRB)){
                return true;
            }

            // Prune if clause modifier detected
            if (this.detectClauseModifier(proposition)){
                return true;
            }

            // Prune if there are NERs on both sides of "be" relation
            // TODO: do this for implicit extractions only?
            if ((rel.getWordList().size() == 1)) {
                if (rel.getWordList().get(0).lemma().equals("be")) {
                    if (subj.isOneNER() && obj.isOneNER()) {
                        if (!obj.getWordList().get(0).ner().equals(NE_TYPE.MISC)) {
                            return true;
                        }
                    }
                }
            }
        }

        return false;
    }
    
    /**
     * Given an annotated proposition, check if it contains a clause modifier as an object. If so, return 'true', else
     * return 'false'
     * @param proposition: annotated proposition
     * @return: 'true' if the object is a clause modifier; 'false' otherwise
     */
    public boolean detectClauseModifier(ObjectArrayList<AnnotatedPhrase> proposition){
        /*for (IndexedWord word: proposition.get(1).getWordList()){
            if (word.index() == -2)
                continue;
            if (this.sentenceSemGraph.getParent(word) != null){
                SemanticGraphEdge edge = this.sentenceSemGraph.getEdge(this.sentenceSemGraph.getParent(word), word);
                if ((edge.getRelation() == EnglishGrammaticalRelations.SUBJECT) || 
                    (edge.getRelation() == EnglishGrammaticalRelations.NOMINAL_SUBJECT) ||
                    (edge.getRelation() == EnglishGrammaticalRelations.CLAUSAL_SUBJECT) ||
                    (edge.getRelation() == EnglishGrammaticalRelations.NOMINAL_PASSIVE_SUBJECT)){
                    return true;
                }
            }
        }*/
        
        if (CoreNLPUtils.verbInList(proposition.get(2).getWordList())){
            for (IndexedWord word: proposition.get(2).getWordList()){
                if (this.sentenceSemGraph.getParent(word) != null){
                    SemanticGraphEdge edge = this.sentenceSemGraph.getEdge(this.sentenceSemGraph.getParent(word), word);
                    if ((edge.getRelation() == EnglishGrammaticalRelations.SUBJECT) || 
                        (edge.getRelation() == EnglishGrammaticalRelations.NOMINAL_SUBJECT) ||
                        (edge.getRelation() == EnglishGrammaticalRelations.CLAUSAL_SUBJECT) ||
                        (edge.getRelation() == EnglishGrammaticalRelations.NOMINAL_PASSIVE_SUBJECT)){
                        return true;
                    }
                }
            }
        }
        return false;
    }
    
    /** Sets the polarity of each annotated proposition **/
    public void setPolarity(){
        Polarity pol = new Polarity();
        // Set polarity according to relations only
        for (int i = 0; i < this.propositions.size(); i++){
            // In some cases, there's only one word, in which case we don't drop anything
            if (this.propositions.get(i).getRelation().getWordList().size() == 1)
                continue;
            
            pol = Polarity.getPolarity(this.propositions.get(i).getRelation(), this.sentenceSemGraph);
            this.propositions.get(i).setPolarity(pol);
            
            // If the polarity is negative, drop the negative words
            this.propositions.get(i).getRelation().removeWordsFromList(pol.getNegativeWords());
        }
    } 
    
    /** Set the modality for all annotated propositions */
    public void setModality(){
        Modality mod = new Modality();
        // Set modality according to relations only
        for (int i = 0; i < this.propositions.size(); i++){
            // In some cases, there's only one word, in which case we don't drop anything
            if (this.propositions.get(i).getRelation().getWordList().size() == 1)
                continue;
            mod = Modality.getModality(this.propositions.get(i).getRelation(), this.sentenceSemGraph);
            this.propositions.get(i).setModality(mod);
            
            // If the modality is poss/cert, drop those words
            this.propositions.get(i).getRelation().removeWordsFromList(mod.getPossibilityWords());
            this.propositions.get(i).getRelation().removeWordsFromList(mod.getCertaintyWords());
        }
    }
    
    public void setSemanticGraph(SemanticGraph sg){
        this.sentenceSemGraph = sg;
    }
    
    /**
     * Adding words to constituents
     * @param i: the index of the constituent
     * @param word: the indexed word to be added
     */
    public void addWordToRelation(int i, IndexedWord word){
        this.propositions.get(i).getRelation().addWordToList(word);
    }
    
    /**
     * Remove the first word from object in proposition 'i'
     * @param i: index of the proposition
     */
    public void removeFirstWordFromObject(int i){
        this.propositions.get(i).getObject().removeWordFromList(0);
    }
    
    /** Dictionary mode minimization **/
    public void minimizeDictionaryMode(ObjectOpenHashSet<String> collocations){
        for (int i = 0; i < this.propositions.size(); i++){
            SubjDictionaryMinimization.minimizeSubject(this.getSubject(i), this.sentenceSemGraph, collocations);
            RelDictionaryMinimization.minimizeRelation(this.getRelation(i), this.sentenceSemGraph, collocations);
            ObjDictionaryMinimization.minimizeObject(this.getObject(i), this.sentenceSemGraph, collocations);
        }
        this.pushWordsToRelationsInPropositions();
    }
    
    /** Safe mode minimization **/
    public void minimizeSafeMode(){
        for (int i = 0; i < this.propositions.size(); i++){
            SubjSafeMinimization.minimizeSubject(this.getSubject(i), this.sentenceSemGraph);
            RelSafeMinimization.minimizeRelation(this.getRelation(i), this.sentenceSemGraph);
            ObjSafeMinimization.minimizeObject(this.getObject(i), this.sentenceSemGraph);
        }
        this.pushWordsToRelationsInPropositions();
    }
    
    /** Aggressive mode minimization **/
    public void minimizeAggressiveMode() {
        for (int i = 0; i < this.propositions.size(); i++) {
            SubjAggressiveMinimization.minimizeSubject(this.getSubject(i), this.sentenceSemGraph);
            RelAggressiveMinimization.minimizeRelation(this.getRelation(i), this.sentenceSemGraph);
            ObjAggressiveMinimization.minimizeObject(this.getObject(i), this.sentenceSemGraph);
        }
        this.pushWordsToRelationsInPropositions();
    }
}