edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetEndAnnotation Java Examples

The following examples show how to use edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetEndAnnotation. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CoreNlpTokenizer.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
@Override
public boolean incrementToken() {
    clearAttributes();
    while (tokens == null || !tokens.hasNext())
        if (!getNextSentence())
            return false;
    CoreLabel token = tokens.next();
    // Use the lemmatized word:
    String word = token.get(LemmaAnnotation.class);
    if (word == null) { // Fallback when no lemmatization happens.
        word = token.get(TextAnnotation.class);
    }
    termAttribute.setLength(0);
    termAttribute.append(word);
    // NER or part of speech annotation
    String pos = token.get(NamedEntityTagAnnotation.class);
    pos = (pos == null || "O".equals(pos)) ? token.get(PartOfSpeechAnnotation.class) : pos;
    typeAttribute.setType(pos != null ? pos : TypeAttribute.DEFAULT_TYPE);
    // Token character offsets
    int be = token.get(CharacterOffsetBeginAnnotation.class).intValue();
    int en = token.get(CharacterOffsetEndAnnotation.class).intValue();
    offsetAttribute.setOffset(be, en);
    // Token in-document position increment:
    positionAttribute.setPositionIncrement(1 + skippedTokens);
    skippedTokens = 0;
    return true;
}
 
Example #2
Source File: CoreNLPHelper.java    From Heracles with GNU General Public License v3.0 4 votes vote down vote up
public static Annotation reconstructStanfordAnnotations(Span sentenceSpan, HashMap<Integer, Word> wordIndex, boolean useWordOrderInsteadOfOffset){
		String originalText = sentenceSpan.getAnnotation("text", String.class); 
		Annotation a = new Annotation(originalText);
		a.set(TextAnnotation.class, originalText);
		
		//a.set(DocIDAnnotation.class, "document");
		
		List<CoreMap> sentenceAnnotations = new ArrayList<CoreMap>();
		a.set(SentencesAnnotation.class, sentenceAnnotations);
		List<CoreLabel> tokenAnnotations = new ArrayList<CoreLabel>();
		a.set(TokensAnnotation.class, tokenAnnotations);
		
		ArrayCoreMap sentenceAnnotation = new ArrayCoreMap();
		sentenceAnnotations.add(sentenceAnnotation);
		
//		int startOffset = sentenceSpan.first().getStartOffset();
		
		for (Word w : sentenceSpan){
			CoreLabel c = new CoreLabel();
			c.set(TextAnnotation.class, w.getWord());
			c.set(OriginalTextAnnotation.class, w.getWord());
			c.set(ValueAnnotation.class, w.getWord());
			c.set(CharacterOffsetBeginAnnotation.class, w.getStartOffset());
			c.set(CharacterOffsetEndAnnotation.class, w.getEndOffset());
			
			
			c.set(IndexAnnotation.class, w.getOrder()+1);
//			c.setIndex(w.getOrder());
			
			c.set(SentenceIndexAnnotation.class, 0);
//			c.setSentIndex(0);
			
			c.set(DocIDAnnotation.class, "document");
			c.setDocID("document");
			
			if (w.hasAnnotation("pos"))
				c.set(PartOfSpeechAnnotation.class, w.getAnnotation("pos",String.class));
			
			if (w.hasAnnotation("lemma"))
				c.set(LemmaAnnotation.class, w.getAnnotation("lemma", String.class));
			
			if (w.hasAnnotation("nerLabel"))
				c.set(NamedEntityTagAnnotation.class, w.getAnnotation("nerLabel", String.class));
			
			if (w.hasAnnotation("nerValue"))
				c.set(NormalizedNamedEntityTagAnnotation.class, w.getAnnotation("nerValue", String.class));
			
			tokenAnnotations.add(c);
			if (useWordOrderInsteadOfOffset){
				wordIndex.put(w.getOrder(), w);
			} else {
				wordIndex.put(w.getStartOffset(), w);
			}
		}
		//essential sentence annotation: TokensAnnotation
		sentenceAnnotation.set(TokensAnnotation.class, tokenAnnotations);
		//essential sentence annotation: TextAnnotation
		sentenceAnnotation.set(TextAnnotation.class, originalText);
		//essential sentence annotation: SentenceIndexAnnotation
		sentenceAnnotation.set(SentenceIndexAnnotation.class, 0);
		
		sentenceAnnotation.set(CharacterOffsetBeginAnnotation.class, 0);
		sentenceAnnotation.set(CharacterOffsetEndAnnotation.class, sentenceSpan.last().getEndOffset());
		sentenceAnnotation.set(TokenBeginAnnotation.class, 0);
		sentenceAnnotation.set(TokenEndAnnotation.class, sentenceSpan.last().getOrder());
		
		return a;
	}
 
Example #3
Source File: JsonPipeline.java    From tac2015-event-detection with GNU General Public License v3.0 4 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
	static void addEntityMentions(Map<String,Object> sent_info, CoreMap sentence) {
        List<CoreMap> coreMentions = sentence.get(MentionsAnnotation.class);
        List<Map> jsonMentions = new ArrayList<>();
        /* trying to figure out the keys in each mention. here's a printout from one.
MENTION August 2014
class edu.stanford.nlp.ling.CoreAnnotations$TextAnnotation	August 2014
class edu.stanford.nlp.ling.CoreAnnotations$CharacterOffsetBeginAnnotation	3
class edu.stanford.nlp.ling.CoreAnnotations$CharacterOffsetEndAnnotation	14
class edu.stanford.nlp.ling.CoreAnnotations$TokensAnnotation	[August-2, 2014-3]
class edu.stanford.nlp.ling.CoreAnnotations$TokenBeginAnnotation	1
class edu.stanford.nlp.ling.CoreAnnotations$TokenEndAnnotation	3
class edu.stanford.nlp.ling.CoreAnnotations$NamedEntityTagAnnotation	DATE
class edu.stanford.nlp.ling.CoreAnnotations$NormalizedNamedEntityTagAnnotation	2014-08
class edu.stanford.nlp.ling.CoreAnnotations$EntityTypeAnnotation	DATE
class edu.stanford.nlp.ling.CoreAnnotations$SentenceIndexAnnotation	0
class edu.stanford.nlp.time.TimeAnnotations$TimexAnnotation	<TIMEX3 tid="t1" type="DATE" value="2014-08">August 2014</TIMEX3>
MENTION Barack Obama
class edu.stanford.nlp.ling.CoreAnnotations$TextAnnotation	Barack Obama
class edu.stanford.nlp.ling.CoreAnnotations$CharacterOffsetBeginAnnotation	17
class edu.stanford.nlp.ling.CoreAnnotations$CharacterOffsetEndAnnotation	29
class edu.stanford.nlp.ling.CoreAnnotations$TokensAnnotation	[Barack-5, Obama-6]
class edu.stanford.nlp.ling.CoreAnnotations$TokenBeginAnnotation	4
class edu.stanford.nlp.ling.CoreAnnotations$TokenEndAnnotation	6
class edu.stanford.nlp.ling.CoreAnnotations$NamedEntityTagAnnotation	PERSON
class edu.stanford.nlp.ling.CoreAnnotations$EntityTypeAnnotation	PERSON
class edu.stanford.nlp.ling.CoreAnnotations$SentenceIndexAnnotation	0
MENTION Paris
class edu.stanford.nlp.ling.CoreAnnotations$TextAnnotation	Paris
class edu.stanford.nlp.ling.CoreAnnotations$CharacterOffsetBeginAnnotation	66
class edu.stanford.nlp.ling.CoreAnnotations$CharacterOffsetEndAnnotation	71
class edu.stanford.nlp.ling.CoreAnnotations$TokensAnnotation	[Paris-5]
class edu.stanford.nlp.ling.CoreAnnotations$TokenBeginAnnotation	14
class edu.stanford.nlp.ling.CoreAnnotations$TokenEndAnnotation	15
class edu.stanford.nlp.ling.CoreAnnotations$NamedEntityTagAnnotation	LOCATION
class edu.stanford.nlp.ling.CoreAnnotations$EntityTypeAnnotation	LOCATION
class edu.stanford.nlp.ling.CoreAnnotations$SentenceIndexAnnotation	1
         */
        for (CoreMap mention : coreMentions) {
//            U.p("MENTION " + mention);
//        	for (Class k : mention.keySet()) {
//        		U.pf("%s\t%s\n", k, mention.get(k));
//        	}
            Map m = new HashMap<String, Object>();
            m.put("tokspan", Lists.newArrayList(
            		mention.get(TokenBeginAnnotation.class).intValue(),
            		mention.get(TokenEndAnnotation.class).intValue()));
            m.put("charspan", Lists.newArrayList(
            		mention.get(CharacterOffsetBeginAnnotation.class).intValue(),
            		mention.get(CharacterOffsetEndAnnotation.class).intValue()));
            m.put("sentence", mention.get(SentenceIndexAnnotation.class).intValue());
            String entityType = mention.get(EntityTypeAnnotation.class);
            m.put("type", entityType);
            if (mention.containsKey(NormalizedNamedEntityTagAnnotation.class)) {
            	m.put("normalized", mention.get(NormalizedNamedEntityTagAnnotation.class));
            }
            if (mention.containsKey(TimexAnnotation.class)) {
            	m.put("timex_xml", mention.get(TimexAnnotation.class).toString());
            }
            jsonMentions.add(m);
        }
        sent_info.put("entitymentions", jsonMentions);
	}