Java Code Examples for edu.stanford.nlp.ling.IndexedWord#setOriginalText()

The following examples show how to use edu.stanford.nlp.ling.IndexedWord#setOriginalText() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: ImplicitExtractions.java From minie with GNU General Public License v3.0

5 votes

/** Set the the relation to a is-a relation **/
public void setIsARelation() {
    this.rel = new AnnotatedPhrase();
    IndexedWord beWord = new IndexedWord();
    beWord.setWord("is");
    beWord.setOriginalText("is");
    beWord.setTag(POS_TAG.VBZ);
    beWord.setNER(NE_TYPE.NO_NER);
    beWord.setLemma("be");
    beWord.setValue("is");
    beWord.setIndex(-2);
    this.rel.addWordToList(beWord);
    this.rel.setRoot(beWord);
}

Example 2

Source File: MinIE.java From minie with GNU General Public License v3.0

4 votes

/**
 * Process possessives in the object.
 * If we have ("SUBJ", "REL", "NP_1 POS NP_2"), then: ("SUBJ", "REL + NP_1 + of", "NP_2")
 * @param prop: proposition (list of annotated phrases)
 */
public void processPoss(ObjectArrayList<AnnotatedPhrase> prop){
    // If there's no object (clause type SV), return
    if (prop.size() < 3)
        return;
    
    AnnotatedPhrase object = prop.get(2);
    AnnotatedPhrase rel = prop.get(1);
    TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.T_NP_POS_NP);
    TokenSequenceMatcher tMatcher = tPattern.getMatcher(object.getWordCoreLabelList());
    
    int posIndex = -1;
    
    while (tMatcher.find()){         
        List<CoreMap> match = tMatcher.groupNodes();
        
        // Check if the first/last word of the match is the first/last word of the object
        CoreLabel firstWord = new CoreLabel(match.get(0));
        CoreLabel lastWord = new CoreLabel(match.get(match.size() - 1));
        boolean check = false;
        if (firstWord.index() == object.getWordList().get(0).index()){
            if (lastWord.index() == object.getWordList().get(object.getWordList().size() - 1).index()){
                check = true;
            }
        }
        if (!check) break;
        
        for (CoreMap cm: match){
            CoreLabel cl = new CoreLabel(cm);
            if (cl.tag().equals(POS_TAG.POS) && (cl.ner().equals(NE_TYPE.NO_NER))){
                posIndex = object.getWordCoreLabelList().indexOf(cl);
                break;
            }
        }
    }
    
    if (posIndex > -1){
        IndexedWord of = new IndexedWord();
        of.setOriginalText("of");
        of.setLemma("of");
        of.setWord("of");
        of.setTag("IN");
        of.setNER("O");
        of.setIndex(-1);
        
        ObjectArrayList<IndexedWord> pushedWords = new ObjectArrayList<>();
        object.removeWordFromList(posIndex);
        for (int i = posIndex; i < object.getWordList().size(); i++){
            pushedWords.add(object.getWordList().get(i));
        }
        rel.addWordsToList(pushedWords);
        rel.addWordToList(of);
        object.removeWordsFromList(pushedWords);
    }
}

Example 3

Source File: ImplicitExtractions.java From minie with GNU General Public License v3.0

4 votes

/** If   ORG+ POS? NP PERSON+ => "PERSON" "is NP of" "ORG" (if there are , and or -> make multiple extractions) **/
public void extractPersonIsNPOfOrg() {
    // Reusable variables
    ObjectArrayList<AnnotatedPhrase> tempProp = new ObjectArrayList<>();
    ObjectArrayList<AnnotatedPhrase> subjects = new ObjectArrayList<>();
    IndexedWord subjRoot;
    IndexedWord objRoot;
    
    this.tPattern = TokenSequencePattern.compile(REGEX.T_ORG_NP_PERSON);
    this.tMatcher = this.tPattern.getMatcher(CoreNLPUtils.getCoreLabelListFromIndexedWordList(this.sentence));
    while (this.tMatcher.find()){    
        // Set the relation to be "is-a" relation
        this.setIsARelation();
        
        for (IndexedWord w: CoreNLPUtils.listOfCoreMapWordsToIndexedWordList(this.tMatcher.groupNodes())) {
            if (w.ner().equals(NE_TYPE.PERSON))
                this.subj.addWordToList(w);
            else if (w.ner().equals(NE_TYPE.ORGANIZATION))
                this.obj.addWordToList(w);
            else if (w.tag().equals(POS_TAG.POS))
                continue;
            else if (w.lemma().equals(CHARACTER.COMMA) || w.lemma().equals("and") || w.lemma().equals("or")) {
                subjRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, this.subj.getWordList());
                subjects.add(new AnnotatedPhrase(this.subj.getWordList().clone(), subjRoot));
                this.subj.clear();
            }
            else this.rel.addWordToList(w);
        }
        subjRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, this.subj.getWordList());
        subjects.add(new AnnotatedPhrase(this.subj.getWordList().clone(), subjRoot));
        objRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, this.obj.getWordList());
        
        IndexedWord ofWord = new IndexedWord();
        ofWord.setWord("of");
        ofWord.setOriginalText("of");
        ofWord.setTag(POS_TAG.IN);
        ofWord.setNER(NE_TYPE.NO_NER);
        ofWord.setLemma("of");
        ofWord.setValue("of");
        ofWord.setIndex(-2);
        this.rel.addWordToList(ofWord);
        
        for (AnnotatedPhrase subject: subjects) {
            // Add the subj/rel/obj to the temporary proposition and then to the real propositions
            subjRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, subject.getWordList());
            tempProp.add(new AnnotatedPhrase(subject.getWordList(), subjRoot));
            tempProp.add(new AnnotatedPhrase(this.rel.getWordList().clone(), this.rel.getRoot()));
            tempProp.add(new AnnotatedPhrase(this.obj.getWordList().clone(), objRoot));
            this.propositions.add(new AnnotatedProposition(tempProp.clone(), new Attribution()));
            tempProp.clear();
        }
        
        // Clean the variables
        this.subj.clear();
        this.obj.clear();
        this.rel.clear();
    }
}

Example 4

Source File: ImplicitExtractions.java From minie with GNU General Public License v3.0

4 votes

/** If (NP+ PERSON) => "PERSON" "is" "NP" **/
public void extractNounPerson() {
    // Reusable variables
    ObjectArrayList<AnnotatedPhrase> tempProp = new ObjectArrayList<>();
    IndexedWord subjRoot;
    IndexedWord objRoot;
    
    // Set the relation to be "is-a" relation
    this.setIsARelation();
    
    this.tPattern = TokenSequencePattern.compile(REGEX.T_NP_PERSON);
    this.tMatcher = this.tPattern.getMatcher(CoreNLPUtils.getCoreLabelListFromIndexedWordList(this.sentence));
    while (this.tMatcher.find()){         
        for (IndexedWord w: CoreNLPUtils.listOfCoreMapWordsToIndexedWordList(this.tMatcher.groupNodes())) {
            if (w.ner().equals(NE_TYPE.PERSON)) {
                this.subj.addWordToList(w);
            }
            else {
                if (w.lemma().toLowerCase().equals("mrs.") || w.lemma().toLowerCase().equals("ms.") || 
                    w.lemma().toLowerCase().equals("mrs") || w.lemma().toLowerCase().equals("ms")) {
                    IndexedWord female = new IndexedWord();
                    female.setWord("female");
                    female.setOriginalText("female");
                    female.setTag(POS_TAG.NN);
                    female.setNER(NE_TYPE.NO_NER);
                    female.setLemma("female");
                    female.setValue("female");
                    female.setIndex(-2);
                    this.obj.addWordToList(female);
                }
                else if (w.lemma().toLowerCase().equals("mr.") || w.lemma().toLowerCase().equals("mr")) {
                    IndexedWord male = new IndexedWord();
                    male.setWord("male");
                    male.setOriginalText("male");
                    male.setTag(POS_TAG.NN);
                    male.setNER(NE_TYPE.NO_NER);
                    male.setLemma("male");
                    male.setValue("male");
                    male.setIndex(-2);
                    this.obj.addWordToList(male);
                }
                else if (Polarity.NEG_WORDS.contains(w.lemma().toLowerCase())) {
                    continue;
                }
                else {
                    this.obj.addWordToList(w);
                }
            }
        }
            
        // Add the subj/rel/obj to the temporary proposition and then to the real propositions
        subjRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, this.subj.getWordList());
        objRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, this.obj.getWordList());
        tempProp.add(new AnnotatedPhrase(this.subj.getWordList().clone(), subjRoot));
        tempProp.add(new AnnotatedPhrase(this.rel.getWordList().clone(), this.rel.getRoot()));
        tempProp.add(new AnnotatedPhrase(this.obj.getWordList().clone(), objRoot));
        this.propositions.add(new AnnotatedProposition(tempProp.clone(), new Attribution()));
            
        // Clean the variables
        tempProp.clear();
        this.subj.clear();
        this.obj.clear();
    }

    // Clear the relation
    this.rel.clear();
}