Java Code Examples for edu.stanford.nlp.ling.CoreLabel#get()

The following examples show how to use edu.stanford.nlp.ling.CoreLabel#get() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Postprocess.java    From phrases with Apache License 2.0 6 votes vote down vote up
public List<Pattern> run(List<Pattern> patterns) {

        Properties props = new Properties();
        props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, sentiment");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

        for (Pattern pattern : patterns) {
            Annotation annotation = pipeline.process(pattern.toSentences());
            for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
                    Tree tree = sentence.get(SentimentCoreAnnotations.AnnotatedTree.class);
                    int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
                    for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
                        String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);

                    }
            }
        }
        return null;
    }
 
Example 2
Source File: Chapter4.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 6 votes vote down vote up
private static void usingStanfordNER() {
        String model = getModelDir() + "\\english.conll.4class.distsim.crf.ser.gz";
        CRFClassifier<CoreLabel> classifier = CRFClassifier.getClassifierNoExceptions(model);

        String sentence = "";
        for (String element : sentences) {
            sentence += element;
        }

        List<List<CoreLabel>> entityList = classifier.classify(sentence);

        for (List<CoreLabel> internalList : entityList) {
            for (CoreLabel coreLabel : internalList) {
                String word = coreLabel.word();
                String category = coreLabel.get(CoreAnnotations.AnswerAnnotation.class);
//                System.out.println(word + ":" + category);
                if (!"O".equals(category)) {
                    System.out.println(word + ":" + category);
                }

            }

        }
    }
 
Example 3
Source File: ItalianReadability.java    From tint with GNU General Public License v3.0 6 votes vote down vote up
static public void addDescriptionForm(String form, HashMap<Integer, Integer> indexes, int start,
        int numberOfTokens, TreeMap<Integer, DescriptionForm> forms, Annotation annotation,
        HashMap<String, GlossarioEntry> glossario) {
    Integer lemmaIndex = indexes.get(start);
    if (lemmaIndex == null) {
        return;
    }

    CoreLabel firstToken = annotation.get(CoreAnnotations.TokensAnnotation.class).get(lemmaIndex);
    CoreLabel endToken = annotation.get(CoreAnnotations.TokensAnnotation.class)
            .get(lemmaIndex + numberOfTokens - 1);
    Integer beginOffset = firstToken.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
    Integer endOffset = endToken.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);

    GlossarioEntry glossarioEntry = glossario.get(form);
    if (glossarioEntry == null) {
        return;
    }

    DescriptionForm descriptionForm = new DescriptionForm(
            beginOffset, endOffset, glossarioEntry);

    forms.put(beginOffset, descriptionForm);
}
 
Example 4
Source File: ItalianReadability.java    From tint with GNU General Public License v3.0 6 votes vote down vote up
@Override public void addingContentWord(CoreLabel token) {
    super.addingContentWord(token);
    HashMap<Integer, HashMultimap<String, String>> easyWords = model.getEasyWords();
    String simplePos = getGenericPos(token.get(CoreAnnotations.PartOfSpeechAnnotation.class));
    String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);

    token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 4);

    if (easyWords.get(3).get(simplePos).contains(lemma)) {
        level3WordSize++;
        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 3);
    }
    if (easyWords.get(2).get(simplePos).contains(lemma)) {
        level2WordSize++;
        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 2);
    }
    if (easyWords.get(1).get(simplePos).contains(lemma)) {
        level1WordSize++;
        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 1);
    }
}
 
Example 5
Source File: NerWithDepartmentTest.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static List<String> extractNER(String doc){
    Annotation document = new Annotation(doc);

    pipeline.annotate(document);
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    List<String> result = new ArrayList<String>();
    for(CoreMap sentence: sentences) {
        // traversing the words in the current sentence
        // a CoreLabel is a CoreMap with additional token-specific methods
        for (CoreLabel token: sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            // this is the text of the token
            String word = token.get(CoreAnnotations.TextAnnotation.class);
            // this is the NER label of the token
            String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
            result.add(ne);
            System.out.println(word + "\t" + ne);
        }
    }
    return result;
}
 
Example 6
Source File: CoreNlpToken.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
@Override
public CoreNlpToken next() {
    CoreLabel label = iterator.next();
    text = label.get(CoreAnnotations.TextAnnotation.class);
    nature = label.get(CoreAnnotations.PartOfSpeechAnnotation.class);
    begin = label.beginPosition();
    end = label.endPosition();
    return this;
}
 
Example 7
Source File: StopwordAnnotatorTest.java    From coreNlp with Apache License 2.0 5 votes vote down vote up
/**
 * Test to validate that stopwords are properly annotated in the token list
 * @throws Exception
 */
@org.junit.Test
public void testLuceneStopwordList() throws Exception {
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, stopword");
    props.setProperty("customAnnotatorClass.stopword", "intoxicant.analytics.coreNlp.StopwordAnnotator");

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation document = new Annotation(example);
    pipeline.annotate(document);
    List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);

    //get the standard lucene stopword set
    Set<?> stopWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;

    for (CoreLabel token : tokens) {

        //get the stopword annotation
        Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class);

        String word = token.word().toLowerCase();
        if (stopWords.contains(word)) {
            assertTrue(stopword.first());
        }
        else {
            assertFalse(stopword.first());
        }

        //not checking lemma, so always false
        assertFalse(stopword.second());
    }
}
 
Example 8
Source File: Chapter8.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 5 votes vote down vote up
private static void usingStanfordPipelineParallel() {
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    String path = "C:\\Current Books\\NLP and Java\\Downloads\\stanford-ner-2014-10-26\\classifiers";
    props.put("ner.model", path + "/english.muc.7class.distsim.crf.ser.gz");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    Annotation annotation1 = new Annotation("The robber took the cash and ran.");
    Annotation annotation2 = new Annotation("The policeman chased him down the street.");
    Annotation annotation3 = new Annotation("A passerby, watching the action, tripped the thief as he passed by.");
    Annotation annotation4 = new Annotation("They all lived happily everafter, except for the thief of course.");
    ArrayList<Annotation> list = new ArrayList();
    list.add(annotation1);
    list.add(annotation2);
    list.add(annotation3);
    list.add(annotation4);
    Iterable<Annotation> iterable = list;

    pipeline.annotate(iterable);

    System.out.println("Total time: " + pipeline.timingInformation());
    List<CoreMap> sentences = annotation2.get(SentencesAnnotation.class);

    for (CoreMap sentence : sentences) {
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            String word = token.get(TextAnnotation.class);
            String pos = token.get(PartOfSpeechAnnotation.class);
            System.out.println("Word: " + word + " POS Tag: " + pos);
        }
    }
}
 
Example 9
Source File: CoreNlpTokenizer.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
@Override
public boolean incrementToken() {
    clearAttributes();
    while (tokens == null || !tokens.hasNext())
        if (!getNextSentence())
            return false;
    CoreLabel token = tokens.next();
    // Use the lemmatized word:
    String word = token.get(LemmaAnnotation.class);
    if (word == null) { // Fallback when no lemmatization happens.
        word = token.get(TextAnnotation.class);
    }
    termAttribute.setLength(0);
    termAttribute.append(word);
    // NER or part of speech annotation
    String pos = token.get(NamedEntityTagAnnotation.class);
    pos = (pos == null || "O".equals(pos)) ? token.get(PartOfSpeechAnnotation.class) : pos;
    typeAttribute.setType(pos != null ? pos : TypeAttribute.DEFAULT_TYPE);
    // Token character offsets
    int be = token.get(CharacterOffsetBeginAnnotation.class).intValue();
    int en = token.get(CharacterOffsetEndAnnotation.class).intValue();
    offsetAttribute.setOffset(be, en);
    // Token in-document position increment:
    positionAttribute.setPositionIncrement(1 + skippedTokens);
    skippedTokens = 0;
    return true;
}
 
Example 10
Source File: CoreNLP.java    From gAnswer with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public Word[] getTaggedWords (String sentence) {
	CoreMap taggedSentence = getPOS(sentence);
	Word[] ret = new Word[taggedSentence.get(TokensAnnotation.class).size()];
	int count = 0;
	for (CoreLabel token : taggedSentence.get(TokensAnnotation.class)) {
		// this is the text of the token
		String word = token.get(TextAnnotation.class);
		// this is the POS tag of the token
		String pos = token.get(PartOfSpeechAnnotation.class);
		//System.out.println(word+"["+pos+"]");
		ret[count] = new Word(getBaseFormOfPattern(word.toLowerCase()), word, pos, count+1);
		count ++;
	}
	return ret;
}
 
Example 11
Source File: CoreNlpExample.java    From core-nlp-example with MIT License 5 votes vote down vote up
public static void main(String[] args) {

        // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution
        Properties props = new Properties();
        props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

        // read some text in the text variable
        String text = "What is the Weather in Bangalore right now?";

        // create an empty Annotation just with the given text
        Annotation document = new Annotation(text);

        // run all Annotators on this text
        pipeline.annotate(document);

        List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);

        for (CoreMap sentence : sentences) {
            // traversing the words in the current sentence
            // a CoreLabel is a CoreMap with additional token-specific methods
            for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
                // this is the text of the token
                String word = token.get(CoreAnnotations.TextAnnotation.class);
                // this is the POS tag of the token
                String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
                // this is the NER label of the token
                String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);

                System.out.println(String.format("Print: word: [%s] pos: [%s] ne: [%s]", word, pos, ne));
            }
        }
    }
 
Example 12
Source File: DigiCompMorphAnnotator.java    From tint with GNU General Public License v3.0 5 votes vote down vote up
@Override
public void annotate(Annotation annotation) {
    if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
        for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
            List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
            for (CoreLabel c : tokens) {
                String[] morph_fatures = c.get(DigiMorphAnnotations.MorphoAnnotation.class).split(" ");
                String lemma = c.get(CoreAnnotations.LemmaAnnotation.class);
                if (morph_fatures.length > 1) {
                    List<String> comps = new ArrayList<>();
                    for (String m : morph_fatures) {
                        if (m.startsWith(lemma + "+") || m.startsWith(lemma + "~")) {
                            comps.add(m);
                        }
                    }
                    c.set(DigiMorphAnnotations.MorphoCompAnnotation.class, comps);
                } else {

                    if (morph_fatures[0].startsWith(lemma + "+") || morph_fatures[0].startsWith(lemma + "~")) {
                        c.set(DigiMorphAnnotations.MorphoCompAnnotation.class,
                                new ArrayList<String>(Arrays.asList(morph_fatures[0])));
                    }
                }
            }
        }
    }
}
 
Example 13
Source File: StopwordAnnotatorTest.java    From coreNlp with Apache License 2.0 5 votes vote down vote up
/**
 * Test to validate that the custom stopword list words
 * @throws Exception
 */
@org.junit.Test
public void testCustomStopwordList() throws Exception {

    //setup coreNlp properties for stopwords. Note the custom stopword list property
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, stopword");
    props.setProperty("customAnnotatorClass.stopword", "intoxicant.analytics.coreNlp.StopwordAnnotator");
    props.setProperty(StopwordAnnotator.STOPWORDS_LIST, customStopWordList);

    //get the custom stopword set
    Set<?> stopWords = StopwordAnnotator.getStopWordList(Version.LUCENE_36, customStopWordList, true);

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation document = new Annotation(example);
    pipeline.annotate(document);
    List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);
    for (CoreLabel token : tokens) {

        //get the stopword annotation
        Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class);

        String word = token.word().toLowerCase();
        if (stopWords.contains(word)) {
            assertTrue(stopword.first());
        }
        else {
            assertFalse(stopword.first());
        }

        //not checking lemma, so always false
        assertFalse(stopword.second());
    }
}
 
Example 14
Source File: ReplaceSubordinateRule.java    From tint with GNU General Public License v3.0 4 votes vote down vote up
@Override public String apply(Annotation annotation, Map<Integer, HashMultimap<Integer, Integer>> children) {

        InverseDigiMorph dm = new InverseDigiMorph();

        int conj = 0;
        List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
        CoreMap sentence = sentences.get(0);

        //

        SemanticGraph semanticGraph = sentence
                .get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);

        IndexedWord node = semanticGraph.getNodeByIndex(conj + 1);
        List<IndexedWord> history = getHistory(semanticGraph, node);
        if (history.size() == 1) {
            return null;
        }
        IndexedWord verb = history.get(1);
        CoreLabel token = sentence.get(CoreAnnotations.TokensAnnotation.class).get(verb.index() - 1);
        String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
        if (!pos.startsWith("V")) {
            return null;
        }

        // todo: check subject in parse tree
        // todo: check clitics

        String morpho = token.get(DigiMorphAnnotations.MorphoAnnotation.class);
        String[] parts = morpho.split("\\s+");
        TreeSet<String> persons = new TreeSet<>();
        String tempo = null;
        for (int i = 1; i < parts.length; i++) {
            String[] vParts = parts[i].split("\\+");
            if (!vParts[1].equals("v")) {
                continue;
            }

            String modo = vParts[2];
            if (!modo.equals("cong")) {
                continue;
            }

            tempo = vParts[3];
            persons.add(vParts[5] + "+" + vParts[6]);
        }

        IndexedWord next = null;
        if (persons.size() != 1) {
            for (int i = 2; i < history.size(); i++) {
                if (history.get(i).get(CoreAnnotations.PartOfSpeechAnnotation.class).startsWith("V")) {
                    next = history.get(i);
                    break;
                }
            }
            persons = getPersons(semanticGraph, next, sentence);
        }

        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append(token.lemma());
        stringBuffer.append("+v+indic+").append(tempo);
        stringBuffer.append("+nil+");

        // Add person
        stringBuffer.append(persons.last());

        String find = stringBuffer.toString();

        System.out.println(find);
        String inverseMorphology = dm.getInverseMorphology(find);

        System.out.println(inverseMorphology);
        System.out.println(morpho);
        System.out.println(tempo);
        System.out.println(persons);

//        System.out.println(annotation.get(UDPipeAnnotations.UDPipeOriginalAnnotation.class));
//        System.out.println(sentence.get(CoreAnnotations.TokensAnnotation.class).get(2)
//                .get(UDPipeAnnotations.FeaturesAnnotation.class));
//        System.out.println(token
//                .get(UDPipeAnnotations.FeaturesAnnotation.class));
//
//        System.out.println(children.get(0).get(verb.index()));
//        System.out.println(children);
//        System.out.println(verb.get(UDPipeAnnotations.FeaturesAnnotation.class));

//        try {
//            System.out.println(JSONOutputter.jsonPrint(annotation));
//        } catch (IOException e) {
//            e.printStackTrace();
//        }
//        System.out.println(getHistory(semanticGraph, node));
//        System.out.println(semanticGraph.getOutEdgesSorted(node));
//        System.out.println(semanticGraph.getIncomingEdgesSorted(node));
//        System.out.println(node);
        return null;
    }
 
Example 15
Source File: CoreNLPPosTagger.java    From Heracles with GNU General Public License v3.0 4 votes vote down vote up
/**
	 * Process the Dataset in chunks, as defined by the <code>spanType</code> parameter.
	 * The Spans denoted by spanType must each contain Words belonging to a single sentence.
	 * 
	 */
	@Override
	public void validatedProcess(Dataset dataset, String spanTypeOfSentenceUnit){
//		if (dataset.getPerformedNLPTasks().contains(getTask())){
//			Framework.error("This dataset has already been tagged with POS.");
//			return;
//		}
		//check if prerequisites are satisfied
		if (!dataset.getPerformedNLPTasks().containsAll(prerequisites)){
			HashSet<NLPTask> missingTasks = new HashSet<>();
			missingTasks.addAll(prerequisites);
			missingTasks.removeAll(dataset.getPerformedNLPTasks());
			Framework.error("This dataset does not meet the requirements to use this component! Missing tasks: " + missingTasks);
			return;
		}
		
		Properties prop1 = new Properties();
		prop1.setProperty("annotators", "pos");
		StanfordCoreNLP pipeline = new StanfordCoreNLP(prop1, false);
		
		for (Span span : dataset.getSpans(spanTypeOfSentenceUnit)){

			
			HashMap<Integer, Word> wordIndex = new HashMap<>();
			Annotation a = CoreNLPHelper.reconstructStanfordAnnotations(span, wordIndex);
			if (a == null){
				System.out.println(a);
			}
			pipeline.annotate(a);
			List<CoreMap> sentenceAnnotations = a.get(SentencesAnnotation.class);
			for (CoreMap sentence : sentenceAnnotations){
				for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
					
					Word w = wordIndex.get(token.get(CharacterOffsetBeginAnnotation.class));
					String tempPos = token.get(PartOfSpeechAnnotation.class);
					if (w.hasAnnotation("URI")){
						w.putAnnotation("pos", "NNP");
					} else {
						w.putAnnotation("pos", tempPos);
					}
//					System.out.println(w.getAnnotations());
				}
			

				
				
			}
		}		
	}
 
Example 16
Source File: CoreNLPToJSON.java    From phrasal with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Process an English text file.
 * 
 * @param args
 * @throws IOException 
 */
public static void main(String[] args) throws IOException {
  if (args.length < 1) {
    System.err.printf("Usage: java %s file [inputproperties_str] > json_output%n", CoreNLPToJSON.class.getName());
    System.exit(-1);
  }
  String textFile = args[0];
  InputProperties inputProperties = args.length > 1 ? InputProperties.fromString(args[1]) : new InputProperties();

  StanfordCoreNLP coreNLP = new StanfordCoreNLP(properties);
  
  // Configure tokenizer
  EnglishPreprocessor preprocessor = new EnglishPreprocessor(true);
  
  // Use a map with ordered keys so that the output is ordered by segmentId.
  Map<Integer,SourceSegment> annotations = new TreeMap<Integer,SourceSegment>();
  LineNumberReader reader = IOTools.getReaderFromFile(textFile);
  for (String line; (line = reader.readLine()) != null;) {
    Annotation annotation = coreNLP.process(line);
    List<CoreMap> sentences = annotation.get(SentencesAnnotation.class);
    if (sentences.size() != 1) {
      throw new RuntimeException("Sentence splitting on line: " + String.valueOf(reader.getLineNumber()));
    }
    CoreMap sentence = sentences.get(0);
    Tree tree = sentence.get(TreeAnnotation.class);
    tree.indexLeaves();
    int[] chunkVector = getChunkVector(tree);
    List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
    int numTokens = tokens.size();
    SymmetricalWordAlignment alignment = preprocessor.processAndAlign(line);
    if (alignment.e().size() != numTokens) {
      throw new RuntimeException(String.format("Tokenizer configurations differ: %d/%d", alignment.e().size(), numTokens));
    }
    SourceSegment segment = new SourceSegment(numTokens);
    segment.layoutSpec.addAll(makeLayoutSpec(alignment));
    segment.inputProperties = inputProperties.toString();
    for (int j = 0; j < numTokens; ++j) {
      CoreLabel token = tokens.get(j);
      String word = token.get(TextAnnotation.class);
      segment.tokens.add(unescape(word));
      String pos = mapPOS(token.get(PartOfSpeechAnnotation.class));
      segment.pos.add(pos);
      String ne = token.get(NamedEntityTagAnnotation.class);
      segment.ner.add(ne);
      segment.chunkVector[j] = chunkVector[j];
    }
    annotations.put(reader.getLineNumber()-1, segment);
  }
  reader.close();
  System.err.printf("Processed %d sentences%n", reader.getLineNumber());
  
  final SourceDocument jsonDocument = new SourceDocument(textFile, annotations);
  
  // Convert to json
  Gson gson = new Gson();
  String json = gson.toJson(jsonDocument);
  System.out.println(json);
}
 
Example 17
Source File: Readability.java    From tint with GNU General Public License v3.0 4 votes vote down vote up
public void addWord(CoreLabel token) {
        token.set(ReadabilityAnnotations.ContentWord.class, false);
        token.set(ReadabilityAnnotations.LiteralWord.class, false);

        String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
//        String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
        String word = token.word();

        addingToken(token);

        if (isWordPos(pos)) {
            addingWord(token);
            wordCount++;
            docLenLettersOnly += token.endPosition() - token.beginPosition();

            word = flattenToAscii(word);
            Hyphenation hyphenation = hyphenator.hyphenate(word);

            boolean done = false;
            if (hyphenation != null) {
                try {
                    String h = hyphenation.toString();
                    incrementHyphenCount(hyphenation.length() + 1);
                    token.set(ReadabilityAnnotations.HyphenationAnnotation.class, h);
                    done = true;
                    hyphenWordCount++;
                } catch (Exception e) {
                    // ignored
                }
            }

            if (!done && word.length() < 5) {
                incrementHyphenCount(1);
                hyphenWordCount++;
            }

            if (isContentPos(pos)) {
                contentWordSize++;
                addingContentWord(token);
            }
            if (isEasyPos(pos)) {
                contentEasyWordSize++;
                addingEasyWord(token);
            }
        }
        if (token.get(ReadabilityAnnotations.HyphenationAnnotation.class) == null) {
            token.set(ReadabilityAnnotations.HyphenationAnnotation.class, token.originalText());
        }

        String genericPos = getGenericPos(pos);
        posStats.add(pos);
        genericPosStats.add(genericPos);
    }
 
Example 18
Source File: POSExample.java    From core-nlp-example with MIT License 4 votes vote down vote up
public static void main(String[] args) {

        StanfordCoreNLP stanfordCoreNLP = Pipeline.getPipeline();

        String text = "Hey! I am Dinesh Krishnan.";

        CoreDocument coreDocument = new CoreDocument(text);

        stanfordCoreNLP.annotate(coreDocument);

        List<CoreLabel> coreLabelList = coreDocument.tokens();

        for(CoreLabel coreLabel : coreLabelList) {

            String pos = coreLabel.get(CoreAnnotations.PartOfSpeechAnnotation.class);
            System.out.println(coreLabel.originalText() + " = "+ pos);
        }


    }
 
Example 19
Source File: CorenlpPipeline.java    From datashare with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 * Process with entire pipelines
 *
 * @param input    the string to annotator
 * @param hash     the input hash code
 * @param language the input language
 * @return
 */
private Annotations processPipeline(String input, String hash, Language language) throws InterruptedException {
    Annotations annotations = new Annotations(hash, getType(), language);

    // CoreNLP annotations data-structure
    edu.stanford.nlp.pipeline.Annotation coreNlpAnnotation = new edu.stanford.nlp.pipeline.Annotation(input);

    LOGGER.info("sentencing ~ tokenizing ~ POS-tagging ~ name-finding for " + language.toString());

    // Sentencize input
    // Tokenize
    // Pos-tag
    // NER
    CoreNlpPipelineModels.getInstance().get(language).annotate(coreNlpAnnotation);
    // Feed annotations
    List<CoreMap> sentences = coreNlpAnnotation.get(SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        int sentenceBegin = sentence.get(CharacterOffsetBeginAnnotation.class);
        int sentenceEnd = sentence.get(CharacterOffsetEndAnnotation.class);
        annotations.add(SENTENCE, sentenceBegin, sentenceEnd);

        int nerBegin = 0;
        NamedEntity.Category prevCat = NamedEntity.Category.NONE;

        List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
        for (CoreLabel token : tokens) {
            int tokenBegin = token.get(CharacterOffsetBeginAnnotation.class);
            int tokenEnd = token.get(CharacterOffsetEndAnnotation.class);
            String pos = token.get(PartOfSpeechAnnotation.class); // for now we don't use POS tagging
            annotations.add(TOKEN, tokenBegin, tokenEnd);
            annotations.add(POS, tokenBegin, tokenEnd);

            String cat = token.get(NamedEntityTagAnnotation.class);
            NamedEntity.Category currCat = NamedEntity.Category.parse(cat);
            if (currCat != NamedEntity.Category.NONE) {
                if (prevCat != currCat) {
                    nerBegin = tokenBegin;
                }
            } else {
                if (prevCat != currCat) {
                    annotations.add(NER, nerBegin, tokenBegin, prevCat);
                }
            }
            prevCat = currCat;
        }
    }
    return annotations;
}
 
Example 20
Source File: NERExample.java    From core-nlp-example with MIT License 3 votes vote down vote up
public static void main(String[] args)
    {

        StanfordCoreNLP stanfordCoreNLP = Pipeline.getPipeline();

        String text = "Hey! My  name is  Krishnan and I have friend his name is Robert." +
                " We both are living in Berlin";

        CoreDocument coreDocument = new CoreDocument(text);

        stanfordCoreNLP.annotate(coreDocument);

        List<CoreLabel> coreLabels = coreDocument.tokens();

        for(CoreLabel coreLabel : coreLabels) {

            String ner = coreLabel.get(CoreAnnotations.NamedEntityTagAnnotation.class);

            System.out.println(coreLabel.originalText() + " = "+ ner);
        }

        /*  List nameList = coreLabels
                .stream()
                .filter(coreLabel -> "Person".equalsIgnoreCase(coreLabel.get(CoreAnnotations.NamedEntityTagAnnotation.class)))
                .collect(Collectors.toList());

        System.out.println(nameList);
             */
}