Java Code Examples for edu.stanford.nlp.ling.CoreLabel#set()

The following examples show how to use edu.stanford.nlp.ling.CoreLabel#set() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: GalicianReadability.java From tint with GNU General Public License v3.0

6 votes

@Override public void addingContentWord(CoreLabel token) {
    super.addingContentWord(token);

    token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 4);
    String lemma = token.lemma();
    if (model.getLevel3Lemmas().contains(lemma)) {
        level3WordSize++;
        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 3);
    }
    if (model.getLevel2Lemmas().contains(lemma)) {
        level2WordSize++;
        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 2);
    }
    if (model.getLevel1Lemmas().contains(lemma)) {
        level1WordSize++;
        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 1);
    }
}

Example 2

Source File: ItalianReadability.java From tint with GNU General Public License v3.0

6 votes

@Override public void addingContentWord(CoreLabel token) {
    super.addingContentWord(token);
    HashMap<Integer, HashMultimap<String, String>> easyWords = model.getEasyWords();
    String simplePos = getGenericPos(token.get(CoreAnnotations.PartOfSpeechAnnotation.class));
    String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);

    token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 4);

    if (easyWords.get(3).get(simplePos).contains(lemma)) {
        level3WordSize++;
        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 3);
    }
    if (easyWords.get(2).get(simplePos).contains(lemma)) {
        level2WordSize++;
        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 2);
    }
    if (easyWords.get(1).get(simplePos).contains(lemma)) {
        level1WordSize++;
        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 1);
    }
}

Example 3

Source File: DigiCompMorphAnnotator.java From tint with GNU General Public License v3.0

5 votes

@Override
public void annotate(Annotation annotation) {
    if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
        for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
            List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
            for (CoreLabel c : tokens) {
                String[] morph_fatures = c.get(DigiMorphAnnotations.MorphoAnnotation.class).split(" ");
                String lemma = c.get(CoreAnnotations.LemmaAnnotation.class);
                if (morph_fatures.length > 1) {
                    List<String> comps = new ArrayList<>();
                    for (String m : morph_fatures) {
                        if (m.startsWith(lemma + "+") || m.startsWith(lemma + "~")) {
                            comps.add(m);
                        }
                    }
                    c.set(DigiMorphAnnotations.MorphoCompAnnotation.class, comps);
                } else {

                    if (morph_fatures[0].startsWith(lemma + "+") || morph_fatures[0].startsWith(lemma + "~")) {
                        c.set(DigiMorphAnnotations.MorphoCompAnnotation.class,
                                new ArrayList<String>(Arrays.asList(morph_fatures[0])));
                    }
                }
            }
        }
    }
}

Example 4

Source File: DigiInverseMorphAnnotator.java From tint with GNU General Public License v3.0

5 votes

public void annotate(Annotation annotation) {
    if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
        for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
            List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
            for (CoreLabel c : tokens) {
                String[] morph_features = c.get(DigiMorphAnnotations.MorphoAnnotation.class).split(" ");

                c.set(DigiInverseMorphAnnotations.InverseMorphoAnnotation.class, morph_features[0]);

            }
        }
    }

}

Example 5

Source File: SpanishReadability.java From tint with GNU General Public License v3.0

5 votes

@Override public void addingContentWord(CoreLabel token) {
        super.addingContentWord(token);

        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 4);
        String lemma = token.lemma();
        if (model.getLevel3Lemmas().contains(lemma)) {
            level3WordSize++;
            token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 3);
        }
        if (model.getLevel2Lemmas().contains(lemma)) {
            level2WordSize++;
            token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 2);
        }
        if (model.getLevel1Lemmas().contains(lemma)) {
            level1WordSize++;
            token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 1);
        }
//        System.out.println("Adding content word (lemma): " + lemma);
//        System.out.println(model.getLevel1Lemmas().contains(lemma));
//        System.out.println(model.getLevel2Lemmas().contains(lemma));
//        System.out.println(model.getLevel3Lemmas().contains(lemma));
//        System.out.println();

//        HashMap<Integer, HashMultimap<String, String>> easyWords = model.getEasyWords();
//        String simplePos = getGenericPos(token.get(CoreAnnotations.PartOfSpeechAnnotation.class));
//        String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
//
//        if (easyWords.get(1).get(simplePos).contains(lemma)) {
//            level1WordSize++;
//        }
//        if (easyWords.get(2).get(simplePos).contains(lemma)) {
//            level2WordSize++;
//        }
//        if (easyWords.get(3).get(simplePos).contains(lemma)) {
//            level3WordSize++;
//        }
    }

Example 6

Source File: UPosAnnotator.java From tint with GNU General Public License v3.0

5 votes

@Override
public void annotate(Annotation annotation) {
    for (CoreLabel token : annotation.get(CoreAnnotations.TokensAnnotation.class)) {
        String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);

        String[] parts = pos.split("\\+");
        StringBuffer upos = new StringBuffer();
        for (String part : parts) {
            String thisPos = uposMap.getOrDefault(part, DEFAULT_UPOS);
            upos.append("+").append(thisPos);
        }
        token.set(CustomAnnotations.UPosAnnotation.class, upos.substring(1));
    }

}

Example 7

Source File: ProcessorTools.java From phrasal with GNU General Public License v3.0

5 votes

private static CoreLabel createDatum(String character, String label, int index, String parentToken, int charIndex) {
  CoreLabel labeledCharacter = new CoreLabel();
  labeledCharacter.set(CoreAnnotations.TextAnnotation.class, character);
  labeledCharacter.set(CoreAnnotations.CharAnnotation.class, character);
  labeledCharacter.set(CoreAnnotations.ParentAnnotation.class, parentToken);
  labeledCharacter.set(CoreAnnotations.AnswerAnnotation.class, label);
  labeledCharacter.set(CoreAnnotations.GoldAnswerAnnotation.class, label);
  labeledCharacter.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, charIndex);
  labeledCharacter.setIndex(index);
  return labeledCharacter;
}

Example 8

Source File: StopwordAnnotator.java From coreNlp with Apache License 2.0

5 votes

@Override
public void annotate(Annotation annotation) {
    if (stopwords != null && stopwords.size() > 0 && annotation.containsKey(TokensAnnotation.class)) {
        List<CoreLabel> tokens = annotation.get(TokensAnnotation.class);
        for (CoreLabel token : tokens) {
            boolean isWordStopword = stopwords.contains(token.word().toLowerCase());
            boolean isLemmaStopword = checkLemma ? stopwords.contains(token.word().toLowerCase()) : false;
            Pair<Boolean, Boolean> pair = Pair.makePair(isWordStopword, isLemmaStopword);
            token.set(StopwordAnnotator.class, pair);
        }
    }
}

Example 9

Source File: CoreNLPHelper.java From Heracles with GNU General Public License v3.0

4 votes

public static Annotation reconstructStanfordAnnotations(Span sentenceSpan, HashMap<Integer, Word> wordIndex, boolean useWordOrderInsteadOfOffset){
		String originalText = sentenceSpan.getAnnotation("text", String.class); 
		Annotation a = new Annotation(originalText);
		a.set(TextAnnotation.class, originalText);
		
		//a.set(DocIDAnnotation.class, "document");
		
		List<CoreMap> sentenceAnnotations = new ArrayList<CoreMap>();
		a.set(SentencesAnnotation.class, sentenceAnnotations);
		List<CoreLabel> tokenAnnotations = new ArrayList<CoreLabel>();
		a.set(TokensAnnotation.class, tokenAnnotations);
		
		ArrayCoreMap sentenceAnnotation = new ArrayCoreMap();
		sentenceAnnotations.add(sentenceAnnotation);
		
//		int startOffset = sentenceSpan.first().getStartOffset();
		
		for (Word w : sentenceSpan){
			CoreLabel c = new CoreLabel();
			c.set(TextAnnotation.class, w.getWord());
			c.set(OriginalTextAnnotation.class, w.getWord());
			c.set(ValueAnnotation.class, w.getWord());
			c.set(CharacterOffsetBeginAnnotation.class, w.getStartOffset());
			c.set(CharacterOffsetEndAnnotation.class, w.getEndOffset());
			
			
			c.set(IndexAnnotation.class, w.getOrder()+1);
//			c.setIndex(w.getOrder());
			
			c.set(SentenceIndexAnnotation.class, 0);
//			c.setSentIndex(0);
			
			c.set(DocIDAnnotation.class, "document");
			c.setDocID("document");
			
			if (w.hasAnnotation("pos"))
				c.set(PartOfSpeechAnnotation.class, w.getAnnotation("pos",String.class));
			
			if (w.hasAnnotation("lemma"))
				c.set(LemmaAnnotation.class, w.getAnnotation("lemma", String.class));
			
			if (w.hasAnnotation("nerLabel"))
				c.set(NamedEntityTagAnnotation.class, w.getAnnotation("nerLabel", String.class));
			
			if (w.hasAnnotation("nerValue"))
				c.set(NormalizedNamedEntityTagAnnotation.class, w.getAnnotation("nerValue", String.class));
			
			tokenAnnotations.add(c);
			if (useWordOrderInsteadOfOffset){
				wordIndex.put(w.getOrder(), w);
			} else {
				wordIndex.put(w.getStartOffset(), w);
			}
		}
		//essential sentence annotation: TokensAnnotation
		sentenceAnnotation.set(TokensAnnotation.class, tokenAnnotations);
		//essential sentence annotation: TextAnnotation
		sentenceAnnotation.set(TextAnnotation.class, originalText);
		//essential sentence annotation: SentenceIndexAnnotation
		sentenceAnnotation.set(SentenceIndexAnnotation.class, 0);
		
		sentenceAnnotation.set(CharacterOffsetBeginAnnotation.class, 0);
		sentenceAnnotation.set(CharacterOffsetEndAnnotation.class, sentenceSpan.last().getEndOffset());
		sentenceAnnotation.set(TokenBeginAnnotation.class, 0);
		sentenceAnnotation.set(TokenEndAnnotation.class, sentenceSpan.last().getOrder());
		
		return a;
	}

Example 10

Source File: Readability.java From tint with GNU General Public License v3.0

4 votes

public void addingContentWord(CoreLabel token) {
    token.set(ReadabilityAnnotations.ContentWord.class, true);
}

Example 11

Source File: Readability.java From tint with GNU General Public License v3.0

4 votes

public void addingWord(CoreLabel token) {
    token.set(ReadabilityAnnotations.LiteralWord.class, true);
}

Example 12

Source File: Readability.java From tint with GNU General Public License v3.0

4 votes

public void addWord(CoreLabel token) {
        token.set(ReadabilityAnnotations.ContentWord.class, false);
        token.set(ReadabilityAnnotations.LiteralWord.class, false);

        String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
//        String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
        String word = token.word();

        addingToken(token);

        if (isWordPos(pos)) {
            addingWord(token);
            wordCount++;
            docLenLettersOnly += token.endPosition() - token.beginPosition();

            word = flattenToAscii(word);
            Hyphenation hyphenation = hyphenator.hyphenate(word);

            boolean done = false;
            if (hyphenation != null) {
                try {
                    String h = hyphenation.toString();
                    incrementHyphenCount(hyphenation.length() + 1);
                    token.set(ReadabilityAnnotations.HyphenationAnnotation.class, h);
                    done = true;
                    hyphenWordCount++;
                } catch (Exception e) {
                    // ignored
                }
            }

            if (!done && word.length() < 5) {
                incrementHyphenCount(1);
                hyphenWordCount++;
            }

            if (isContentPos(pos)) {
                contentWordSize++;
                addingContentWord(token);
            }
            if (isEasyPos(pos)) {
                contentEasyWordSize++;
                addingEasyWord(token);
            }
        }
        if (token.get(ReadabilityAnnotations.HyphenationAnnotation.class) == null) {
            token.set(ReadabilityAnnotations.HyphenationAnnotation.class, token.originalText());
        }

        String genericPos = getGenericPos(pos);
        posStats.add(pos);
        genericPosStats.add(genericPos);
    }