edu.stanford.nlp.pipeline.Annotation Java Examples

The following examples show how to use edu.stanford.nlp.pipeline.Annotation. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ConstituentExample.java    From blog-codes with Apache License 2.0 7 votes vote down vote up
public static void main(String[] args) {
	// set up pipeline properties
	Properties props = new Properties();
	props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse");
	// use faster shift reduce parser
	//props.setProperty("parse.model", "edu/stanford/nlp/models/srparser/englishSR.ser.gz");
	props.setProperty("parse.maxlen", "100");
	// set up Stanford CoreNLP pipeline
	StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	// build annotation for a review
	Annotation annotation = new Annotation("The small red car turned very quickly around the corner.");
	// annotate
	pipeline.annotate(annotation);
	// get tree
	Tree tree = annotation.get(CoreAnnotations.SentencesAnnotation.class).get(0)
			.get(TreeCoreAnnotations.TreeAnnotation.class);
	System.out.println(tree);
	Set<Constituent> treeConstituents = tree.constituents(new LabeledScoredConstituentFactory());
	for (Constituent constituent : treeConstituents) {
		if (constituent.label() != null
				&& (constituent.label().toString().equals("VP") || constituent.label().toString().equals("NP"))) {
			System.err.println("found constituent: " + constituent.toString());
			System.err.println(tree.getLeaves().subList(constituent.start(), constituent.end() + 1));
		}
	}
}
 
Example #2
Source File: CoreNLP.java    From gAnswer with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public SemanticGraph getBasicDependencies (String s) {
    // create an empty Annotation just with the given text
    Annotation document = new Annotation(s);
    
    // run all Annotators on this text
    pipeline_lemma.annotate(document);
    
    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    
    for(CoreMap sentence: sentences) {
      // this is the Stanford dependency graph of the current sentence
      SemanticGraph dependencies = sentence.get(BasicDependenciesAnnotation.class);
      return dependencies;
    }
    
    return null;
}
 
Example #3
Source File: JavaClient.java    From blog-codes with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
	// creates a StanfordCoreNLP object with POS tagging, lemmatization, NER, parsing, and coreference resolution
	Properties props = new Properties();
	
	props.setProperty("annotators", "tokenize,ssplit,pos,ner,depparse,openie"); 
	MultiLangsStanfordCoreNLPClient pipeline = new MultiLangsStanfordCoreNLPClient(props, "http://localhost", 9000, 2, null, null, "zh");

	// read some text in the text variable
	String text = "今天天气很好。";
	// create an empty Annotation just with the given text
	Annotation document = new Annotation(text);
	// run all Annotators on this text
	pipeline.annotate(document);
	
	CoreMap firstSentence = document.get(CoreAnnotations.SentencesAnnotation.class).get(0);
	// this for loop will print out all of the tokens and the character offset info
	for (CoreLabel token : firstSentence.get(CoreAnnotations.TokensAnnotation.class)) {
		System.out.println(token.word() + "\t" + token.beginPosition() + "\t" + token.endPosition());
	}
}
 
Example #4
Source File: SentimentAnalyzer.java    From blog-codes with Apache License 2.0 6 votes vote down vote up
public SentimentResult getSentimentResult(String text) {
	SentimentClassification classification = new SentimentClassification();
	SentimentResult sentimentResult = new SentimentResult();
	if (text != null && text.length() > 0) {
		Annotation annotation = pipeline.process(text);
		for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
			Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
			SimpleMatrix simpleMatrix = RNNCoreAnnotations.getPredictions(tree);

			classification.setVeryNegative((double) Math.round(simpleMatrix.get(0) * 100d));
			classification.setNegative((double) Math.round(simpleMatrix.get(1) * 100d));
			classification.setNeutral((double) Math.round(simpleMatrix.get(2) * 100d));
			classification.setPositive((double) Math.round(simpleMatrix.get(3) * 100d));
			classification.setVeryPositive((double) Math.round(simpleMatrix.get(4) * 100d));

			String setimentType = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
			sentimentResult.setSentimentType(setimentType);
			sentimentResult.setSentimentClass(classification);
			sentimentResult.setSentimentScore(RNNCoreAnnotations.getPredictedClass(tree));
		}
	}
	return sentimentResult;
}
 
Example #5
Source File: StanfordCoreNLPTest.java    From java_in_examples with Apache License 2.0 6 votes vote down vote up
public static void main(String[] s) {
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    // read some text in the text variable
    String text = "\"But I do not want to go among mad people,\" Alice remarked.\n" +
            "\"Oh, you can not help that,\" said the Cat: \"we are all mad here. I am mad. You are mad.\"\n" +
            "\"How do you know I am mad?\" said Alice.\n" +
            "\"You must be,\" said the Cat, \"or you would not have come here.\" This is awful, bad, disgusting";

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);

    // run all Annotators on this text
    pipeline.annotate(document);

    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        String sentiment = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
        System.out.println(sentiment + "\t" + sentence);
    }
}
 
Example #6
Source File: Chapter5.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 6 votes vote down vote up
private static void usingStanfordPOSTagger() {
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos");
    props.put("pos.model", "C:\\Current Books in Progress\\NLP and Java\\Models\\english-caseless-left3words-distsim.tagger");
    props.put("pos.maxlen", 10);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation document = new Annotation(theSentence);
    pipeline.annotate(document);

    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            String word = token.get(TextAnnotation.class);
            String pos = token.get(PartOfSpeechAnnotation.class);
            System.out.print(word + "/" + pos + " ");
        }
        System.out.println();

        try {
            pipeline.xmlPrint(document, System.out);
            pipeline.prettyPrint(document, System.out);
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }
}
 
Example #7
Source File: StanfordTokenizer.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
  String text = aJCas.getDocumentText();
  Annotation document = new Annotation(text);
  StanfordCoreNLP stanfordCoreNLP;

  if(!languageMap.containsKey(aJCas.getDocumentLanguage())) {
    throw new AnalysisEngineProcessException(new LanguageNotSupportedException("Language Not Supported"));
  }

  stanfordCoreNLP = stanfordCoreNLPs[languageMap.get(aJCas.getDocumentLanguage())];

  stanfordCoreNLP.annotate(document);
  List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
  for (CoreMap sentence : sentences) {
    int sstart = sentence.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
    int ssend = sentence.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
    Sentence jsentence = new Sentence(aJCas, sstart, ssend);
    jsentence.addToIndexes();

    for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
      Token casToken = new Token(aJCas, token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
      casToken.addToIndexes();
    }
  }
}
 
Example #8
Source File: StanfordRNNDParser.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
@Override public void process(JCas jCas) throws AnalysisEngineProcessException {
  mappingProvider.configure(jCas.getCas());
  DKPro2CoreNlp converter = new DKPro2CoreNlp();
  Annotation annotatios = converter.convert(jCas, new Annotation());
  List<CoreMap> sentences = annotatios.get(CoreAnnotations.SentencesAnnotation.class);
  for (CoreMap sentence : sentences) {
    GrammaticalStructure gs = parser.predict(sentence);
    SemanticGraph semanticGraph = SemanticGraphFactory.makeFromTree(gs, SemanticGraphFactory.Mode.CCPROCESSED, GrammaticalStructure.Extras.MAXIMAL, null);;
    semanticGraph.prettyPrint();
    semanticGraph = semanticGraphUniversalEnglishToEnglish(semanticGraph);
    sentence.set(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class, semanticGraph);
    for(SemanticGraphEdge edge: semanticGraph.edgeListSorted()) {
      System.out.println(edge);
    }
  }
  convertDependencies(jCas, annotatios, true);
}
 
Example #9
Source File: CorefExample.java    From blog-codes with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	Annotation document = new Annotation(
			"Barack Obama was born in Hawaii.  He is the president. Obama was elected in 2008.");
	Properties props = new Properties();
	props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,coref");
	StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	pipeline.annotate(document);
	System.out.println("---");
	System.out.println("coref chains");
	for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
		System.out.println("\t" + cc);
	}
	for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
		System.out.println("---");
		System.out.println("mentions");
		for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) {
			System.out.println("\t" + m);
		}
	}
}
 
Example #10
Source File: CoreNLP.java    From gAnswer with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * How to use:
 * for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
 * 		// this is the text of the token
 * 		String word = token.get(TextAnnotation.class);
 *		// this is the POS tag of the token
 *		String pos = token.get(PartOfSpeechAnnotation.class);
 *	}
 * @param s
 * @return
 */
public CoreMap getPOS (String s) {
    // create an empty Annotation just with the given text
    Annotation document = new Annotation(s);
    
    // run all Annotators on this text
    pipeline_lemma.annotate(document);
    
    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    
    for(CoreMap sentence: sentences) {
      // this is the sentence with POS Tags
      return sentence;
    }
    
    return null;
}
 
Example #11
Source File: ReconTool.java    From Criteria2Query with Apache License 2.0 6 votes vote down vote up
public boolean isCEE(String text){
	text = text.replace("/", " / ");
	Annotation annotation = new Annotation(text);
	pipeline.annotate(annotation);
	List<CoreMap> sentences = annotation.get(SentencesAnnotation.class);
	boolean flag=false;
	for (CoreMap sentence : sentences) {
		for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
			String word = token.get(TextAnnotation.class);//token.get(LemmaAnnotation.class);//TextAnnotation.class
			String pos = token.get(PartOfSpeechAnnotation.class);
			//String lemma = token.get(LemmaAnnotation.class);
			boolean f = false;
			if ((word.equals("and") || word.equals(",") || word.equals("/") || word.equals("or"))) {
				flag = true;
				break;
			}
			
		}
	}
	
	return flag;
}
 
Example #12
Source File: CorefTool.java    From Criteria2Query with Apache License 2.0 6 votes vote down vote up
public void extractCoref() {
	String s="Subjects with hypothyroidism who are on stable treatment for 3 months prior to screening are required to have TSH and free thyroxine (FT4) obtained. If the TSH value is out of range, but FT4 is normal, such cases should be discussed directly with the JRD responsible safety physician before the subject is enrolled. If the FT4 value is out of range, the subject is not eligible.";
	 Annotation document = new Annotation(s);
	    Properties props = new Properties();
	    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,mention,coref");
	    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	    pipeline.annotate(document);
	    System.out.println("---");
	    System.out.println("coref chains");
	    for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
	      System.out.println("\t" + cc);
	    }
	    for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
	      System.out.println("---");
	      System.out.println("mentions");
	      for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) {
	        System.out.println("\t" + m);
	       }
	    }
}
 
Example #13
Source File: CoreNLPCache.java    From phrasal with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Load serialized CoreNLP annotations from a file.
 *  
 * @param filename
 */
public static int loadSerialized(String filename) {
  Annotation annotation = IOTools.deserialize(filename, Annotation.class);
  List<CoreMap> sentenceList = annotation.get(CoreAnnotations.SentencesAnnotation.class);

  if (sentenceList == null) {
    throw new RuntimeException("Unusable annotation (no sentences) in " + filename);
  }
  annotationMap = new HashMap<Integer,CoreMap>(sentenceList.size());
  int maxLineId = 0;
  for (CoreMap annotationSet : sentenceList) {
    // 1-indexed
    int lineId = annotationSet.get(CoreAnnotations.LineNumberAnnotation.class);
    maxLineId = lineId > maxLineId ? lineId : maxLineId;
    annotationMap.put(lineId-1, annotationSet);
  }
  return maxLineId + 1;
}
 
Example #14
Source File: ComparisonUtils.java    From NLIWOD with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Retrieves a part of speech from the given string, depending on the parameter tag.
 * JJR for comparatives and JJS for superlatives.
 * @param question String to retrieve words from. 
 * @param tag JJR for comparatives and JJS for superlatives.
 * @return List of the retrieved words. 
 */
private ArrayList<String> getWords(String question, String tag) {
	if(question == null || tag == null) return null;
	Annotation annotation = new Annotation(question);
       PIPELINE.annotate(annotation);  
       List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
       
		ArrayList<String> words = new ArrayList<String>();		
		for (CoreMap sentence : sentences) {
           List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
           for(CoreLabel token: tokens) {
              	if(token.tag().startsWith(tag)){
           		String word = token.toString();
           		words.add(word.substring(0, word.lastIndexOf("-")));
           	}
           }
       }       	
		return words;
	}
 
Example #15
Source File: NumberOfToken.java    From NLIWOD with GNU Affero General Public License v3.0 6 votes vote down vote up
/***
 * Returns a list of all noun phrases of the question q.
 * @param q  a question
 * @return list of noun phrases
 */
private ArrayList<String> getNounPhrases(String q) {
		ArrayList<String> nounP = new ArrayList<String>();
    
		Annotation annotation = new Annotation(q);
       PIPELINE.annotate(annotation);
     
       List<CoreMap> question = annotation.get(CoreAnnotations.SentencesAnnotation.class);
       
       for (CoreMap sentence : question) {
           SemanticGraph basicDeps = sentence.get(BasicDependenciesAnnotation.class);
           Collection<TypedDependency> typedDeps = basicDeps.typedDependencies();
        
           Iterator<TypedDependency> dependencyIterator = typedDeps.iterator();
           while(dependencyIterator.hasNext()) {
           	TypedDependency dependency = dependencyIterator.next();
           	String depString = dependency.reln().toString();
           	if(depString.equals("compound") || depString.equals("amod")) {
           		String dep = dependency.dep().toString();
           		String gov = dependency.gov().toString();
           		nounP.add(dep.substring(0, dep.lastIndexOf("/")) + " " + gov.substring(0, gov.lastIndexOf("/")));
           	}
           }
       }    
       return nounP;
	}
 
Example #16
Source File: Postprocess.java    From phrases with Apache License 2.0 6 votes vote down vote up
public List<Pattern> run(List<Pattern> patterns) {

        Properties props = new Properties();
        props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, sentiment");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

        for (Pattern pattern : patterns) {
            Annotation annotation = pipeline.process(pattern.toSentences());
            for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
                    Tree tree = sentence.get(SentimentCoreAnnotations.AnnotatedTree.class);
                    int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
                    for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
                        String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);

                    }
            }
        }
        return null;
    }
 
Example #17
Source File: RelationExtractor.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static HashMap<String, String> extract(String sentence) {
    Annotation doc = new Annotation(sentence);
    pipeline.annotate(doc);
    r.annotate(doc);
    HashMap<String, String> map = new HashMap<String, String>();
    for(CoreMap s: doc.get(CoreAnnotations.SentencesAnnotation.class)){
      List<RelationMention> rls  = s.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
      for(RelationMention rl: rls){
        if(rl.getType().equals("Work_For")){
          System.out.println(rl);
          String organization = "";
          String people = "";
          for (EntityMention entity: rl.getEntityMentionArgs()){
            if(entity.getType().equals("ORGANIZATION")){
              organization = entity.getValue();
            }
            if(entity.getType().equals("PEOPLE")){
              people = entity.getValue();
            }
          }
          map.put(people, organization);
        }
      }
    }
    return map;
}
 
Example #18
Source File: IntelKBPModel.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static HashMap<RelationTriple, String> extract(String doc) {

        Annotation ann = new Annotation(doc
                .replaceAll("\u00a0", " ")
                .replaceAll("\u200B|\u200C|\u200D|\uFEFF", ""));
        pipeline.annotate(ann);
        HashMap<RelationTriple, String> relations = new HashMap<RelationTriple, String>();

        for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
            for (RelationTriple r : sentence.get(CoreAnnotations.KBPTriplesAnnotation.class)) {
                if (r.relationGloss().trim().equals("per:title")
                        || r.relationGloss().trim().equals("per:employee_of")
                        || r.relationGloss().trim().equals("org:top_members/employees")
                        || r.relationGloss().trim().equals("per:former_title")) {
                    relations.put(r, sentence.toString());
                }
            }
        }
        return relations;
    }
 
Example #19
Source File: RegexNerTest.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static List<String> extractNER(String doc){
    Annotation document = new Annotation(doc);

    pipeline.annotate(document);
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    List<String> result = new ArrayList<String>();
    for(CoreMap sentence: sentences) {
        // traversing the words in the current sentence
        // a CoreLabel is a CoreMap with additional token-specific methods
        for (CoreLabel token: sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            // this is the text of the token
            String word = token.get(CoreAnnotations.TextAnnotation.class);
            // this is the POS tag of the token
            String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
            // this is the NER label of the token
            String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
            result.add(ne);
        }
    }
    return result;
}
 
Example #20
Source File: KBPTest.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
/**
 * A debugging method to try relation extraction from the console.
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
  Properties props = StringUtils.argsToProperties(args);
  props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
  props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");

  StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
  IOUtils.console("sentence> ", line -> {
    Annotation ann = new Annotation(line);
    pipeline.annotate(ann);
    for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
      sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(System.err::println);
      System.out.println(sentence);
    }
  });
}
 
Example #21
Source File: Trees.java    From uncc2014watsonsim with GNU General Public License v2.0 6 votes vote down vote up
public static List<CoreMap> parse(String text) {
    
    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);
    
    // run all Annotators on this text
    pipeline.annotate(document);
    
    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    List<Tree> trees = new ArrayList<>();
    List<Tree> dependencies = new ArrayList<>();
    
    for(CoreMap sentence: sentences) {
      // this is the parse tree of the current sentence
    	Tree t = sentence.get(TreeAnnotation.class);
    	SemanticGraph graph = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
    	trees.add(t);
    }
    return sentences;
}
 
Example #22
Source File: StanfordCoref.java    From Graphene with GNU General Public License v3.0 6 votes vote down vote up
@Override
public CoreferenceContent doCoreferenceResolution(String text) {
	Annotation document = new Annotation(text);
	PIPELINE.annotate(document);

	// extract sentences
	List<Sentence> sentences = new ArrayList<>();
	for (CoreMap coreMap : document.get(CoreAnnotations.SentencesAnnotation.class)) {
		Sentence sentence = new Sentence();
		for (CoreLabel coreLabel : coreMap.get(CoreAnnotations.TokensAnnotation.class)) {
			sentence.addWord(coreLabel.word());
		}
		sentences.add(sentence);
	}

	// replace coreferences
	for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
		String coreMention = cc.getRepresentativeMention().mentionSpan;
		for (CorefChain.CorefMention corefMention : cc.getMentionsInTextualOrder()) {
			sentences.get(corefMention.sentNum-1).replaceWords(corefMention.startIndex-1, corefMention.endIndex-1, getReplacement(corefMention.mentionSpan, coreMention));
		}
	}

	return new CoreferenceContent(text, sentences.stream().map(s -> s.toString()).collect(Collectors.joining(" ")));
}
 
Example #23
Source File: Phrase.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
private static List<Tree> _trees(Phrase p) {
	// create an empty Annotation just with the given text
    Annotation document = p.memo(Phrase.coreNLP);
    
    try{
    	// Run the full parse on this text
    	constituency_parse_pipeline.annotate(document);
	} catch (IllegalArgumentException | NullPointerException ex) {
		/*
		 *  On extremely rare occasions (< 0.00000593% of passages)
		 *  it will throw an error like the following:
		 *  
		 *  Exception in thread "main" java.lang.IllegalArgumentException:
		 *  No head rule defined for SYM using class edu.stanford.nlp.trees.SemanticHeadFinder in SYM-10
		 *  
		 *  On more frequent occasions, you get the following:
		 *  Exception in thread "main" java.lang.NullPointerException
   		 *  at edu.stanford.nlp.dcoref.RuleBasedCorefMentionFinder.findHead(RuleBasedCorefMentionFinder.java:276)
   		 *  
   		 *  Both of these are fatal for the passage.
   		 *  Neither are a big deal for the index. Forget them.
		 */
	}
	return p.memo(Phrase.sentences)
			.stream()
			.map(s -> s.get(TreeAnnotation.class))
			.filter(Objects::nonNull)
			.collect(toList());
}
 
Example #24
Source File: ItalianStandardReadability.java    From tint with GNU General Public License v3.0 5 votes vote down vote up
public ItalianStandardReadability(Properties globalProperties, Properties localProperties, Annotation annotation) {
    super(globalProperties, localProperties, annotation);

    contentPosList.add("S");
    contentPosList.add("A");
    contentPosList.add("V");
    contentPosList.add("B");

    simplePosList.add("S");
    simplePosList.add("V");

    nonWordPosList.add("F");

    genericPosDescription.put("A", "Adjective");
    genericPosDescription.put("B", "Adverb");
    genericPosDescription.put("S", "Noun");
    genericPosDescription.put("E", "Preposition");
    genericPosDescription.put("C", "Conjunction");
    genericPosDescription.put("P", "Pronoun");
    genericPosDescription.put("R", "Determiner");
    genericPosDescription.put("F", "Punctuation");
    genericPosDescription.put("D", "Adj. (det.)");
    genericPosDescription.put("V", "Verb");
    genericPosDescription.put("X", "Other");
    genericPosDescription.put("N", "Number");

}
 
Example #25
Source File: JsonPipeline.java    From tac2015-event-detection with GNU General Public License v3.0 5 votes vote down vote up
List getCorefInfo(Annotation doc) {
		Map<Integer, CorefChain> corefChains = doc.get(CorefChainAnnotation.class);
//		List<CoreMap> sentences = doc.get(SentencesAnnotation.class);
		List entities = new ArrayList();
		for (CorefChain chain : corefChains.values()) {
			List mentions = new ArrayList();
			CorefChain.CorefMention representative = chain.getRepresentativeMention();
			for (CorefChain.CorefMention corement : chain.getMentionsInTextualOrder()) {
				Map outment = new HashMap();
				outment.put("sentence", corement.sentNum-1);
				outment.put("tokspan_in_sentence", Lists.newArrayList(
								corement.startIndex-1, corement.endIndex-1));
				outment.put("head",corement.headIndex-1);
				outment.put("gender", corement.gender.toString());
				outment.put("animacy", corement.animacy.toString());
				outment.put("number", corement.number.toString());
				outment.put("mentiontype", corement.mentionType.toString());
				outment.put("mentionid", corement.mentionID);
				if (representative!=null && corement.mentionID==representative.mentionID) {
					outment.put("representative", true);
				}
				mentions.add(outment);
			}
			Map entity = ImmutableMap.builder()
					.put("mentions", mentions)
					.put("entityid", chain.getChainID())
					.build();
			entities.add(entity);
		}
		return entities;
	}
 
Example #26
Source File: StopwordAnnotatorTest.java    From coreNlp with Apache License 2.0 5 votes vote down vote up
/**
 * Test to validate that the custom stopword list words
 * @throws Exception
 */
@org.junit.Test
public void testCustomStopwordList() throws Exception {

    //setup coreNlp properties for stopwords. Note the custom stopword list property
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, stopword");
    props.setProperty("customAnnotatorClass.stopword", "intoxicant.analytics.coreNlp.StopwordAnnotator");
    props.setProperty(StopwordAnnotator.STOPWORDS_LIST, customStopWordList);

    //get the custom stopword set
    Set<?> stopWords = StopwordAnnotator.getStopWordList(Version.LUCENE_36, customStopWordList, true);

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation document = new Annotation(example);
    pipeline.annotate(document);
    List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);
    for (CoreLabel token : tokens) {

        //get the stopword annotation
        Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class);

        String word = token.word().toLowerCase();
        if (stopWords.contains(word)) {
            assertTrue(stopword.first());
        }
        else {
            assertFalse(stopword.first());
        }

        //not checking lemma, so always false
        assertFalse(stopword.second());
    }
}
 
Example #27
Source File: JsonPipeline.java    From tac2015-event-detection with GNU General Public License v3.0 5 votes vote down vote up
/** runs the corenlp pipeline with all options, and returns all results as a JSON object. */
	@SuppressWarnings({ "rawtypes", "unchecked" })
	JsonNode processTextDocument(String doctext) {
		if (startMilli==-1)  startMilli = System.currentTimeMillis();
		numDocs++;
		numChars += doctext.length();

		Annotation document = new Annotation(doctext);
		pipeline.annotate(document);

		List<CoreMap> sentences = document.get(SentencesAnnotation.class);
		List<Map> outSentences = Lists.newArrayList();

		for(CoreMap sentence: sentences) {
			Map<String,Object> sent_info = Maps.newHashMap();
			addTokenBasics(sent_info, sentence);
			numTokens += ((List) sent_info.get("tokens")).size();
			for (String annotator : annotators()) {
				addAnnoToSentenceObject(sent_info, sentence, annotator);
			}
			outSentences.add(sent_info);
		}


		ImmutableMap.Builder b = new ImmutableMap.Builder();
//		b.put("text", doctext);
		b.put("sentences", outSentences);
		
		if (Lists.newArrayList(annotators()).contains("dcoref")) {
			List outCoref = getCorefInfo(document);
			b.put("entities", outCoref);
		}
		Map outDoc = b.build();
		return JsonUtil.toJson(outDoc);
	}
 
Example #28
Source File: VerbTest.java    From tint with GNU General Public License v3.0 5 votes vote down vote up
public static void main(String[] args) {
        try {
            Properties properties = new Properties();
            properties.setProperty("annotators", "ita_toksent, udpipe, ita_verb");
            properties.setProperty("customAnnotatorClass.udpipe", "eu.fbk.fcw.udpipe.api.UDPipeAnnotator");
            properties.setProperty("customAnnotatorClass.ita_toksent",
                    "eu.fbk.dh.tint.tokenizer.annotators.ItalianTokenizerAnnotator");
            properties.setProperty("customAnnotatorClass.ita_verb",
                    "eu.fbk.dh.tint.verb.VerbAnnotator");

            properties.setProperty("udpipe.server", "gardner");
            properties.setProperty("udpipe.port", "50020");
            properties.setProperty("udpipe.keepOriginal", "1");

//        properties.setProperty("udpipe.model", "/Users/alessio/Desktop/model");
            StanfordCoreNLP pipeline = new StanfordCoreNLP(properties);

            Annotation annotation = new Annotation("Il caporale alpino Giampietro Civati caduto in combattimento il 5 dicembre 1944, come racconta Silvestri, ha scritto questo mirabile testamento: «sono figlio d’Italia, d’anni 21, non di Graziani e nemmeno di Badoglio, ma sono italiano e seguo la via che salverà l’onore d’Italia».");
            pipeline.annotate(annotation);
            String out = JSONOutputter.jsonPrint(annotation);
            System.out.println(out);
//            for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
//                System.out.println(sentence.get(VerbAnnotations.VerbsAnnotation.class));
//            }

        } catch (Exception e) {
            e.printStackTrace();
        }
    }
 
Example #29
Source File: DigiCompMorphAnnotator.java    From tint with GNU General Public License v3.0 5 votes vote down vote up
@Override
public void annotate(Annotation annotation) {
    if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
        for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
            List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
            for (CoreLabel c : tokens) {
                String[] morph_fatures = c.get(DigiMorphAnnotations.MorphoAnnotation.class).split(" ");
                String lemma = c.get(CoreAnnotations.LemmaAnnotation.class);
                if (morph_fatures.length > 1) {
                    List<String> comps = new ArrayList<>();
                    for (String m : morph_fatures) {
                        if (m.startsWith(lemma + "+") || m.startsWith(lemma + "~")) {
                            comps.add(m);
                        }
                    }
                    c.set(DigiMorphAnnotations.MorphoCompAnnotation.class, comps);
                } else {

                    if (morph_fatures[0].startsWith(lemma + "+") || morph_fatures[0].startsWith(lemma + "~")) {
                        c.set(DigiMorphAnnotations.MorphoCompAnnotation.class,
                                new ArrayList<String>(Arrays.asList(morph_fatures[0])));
                    }
                }
            }
        }
    }
}
 
Example #30
Source File: ItalianTokenizerAnnotator.java    From tint with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Given an Annotation, perform a task on this Annotation.
 *
 * @param annotation
 */
@Override public void annotate(Annotation annotation) {
    String text = annotation.get(CoreAnnotations.TextAnnotation.class);
    List<List<CoreLabel>> sTokens = tokenizer
            .parse(text, newlineIsSentenceBreak, tokenizeOnlyOnSpace, ssplitOnlyOnNewLine);
    Utils.addBasicAnnotations(annotation, sTokens, text);
}