edu.stanford.nlp.pipeline.StanfordCoreNLP Java Examples

The following examples show how to use edu.stanford.nlp.pipeline.StanfordCoreNLP. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ConstituentExample.java    From blog-codes with Apache License 2.0 7 votes vote down vote up
public static void main(String[] args) {
	// set up pipeline properties
	Properties props = new Properties();
	props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse");
	// use faster shift reduce parser
	//props.setProperty("parse.model", "edu/stanford/nlp/models/srparser/englishSR.ser.gz");
	props.setProperty("parse.maxlen", "100");
	// set up Stanford CoreNLP pipeline
	StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	// build annotation for a review
	Annotation annotation = new Annotation("The small red car turned very quickly around the corner.");
	// annotate
	pipeline.annotate(annotation);
	// get tree
	Tree tree = annotation.get(CoreAnnotations.SentencesAnnotation.class).get(0)
			.get(TreeCoreAnnotations.TreeAnnotation.class);
	System.out.println(tree);
	Set<Constituent> treeConstituents = tree.constituents(new LabeledScoredConstituentFactory());
	for (Constituent constituent : treeConstituents) {
		if (constituent.label() != null
				&& (constituent.label().toString().equals("VP") || constituent.label().toString().equals("NP"))) {
			System.err.println("found constituent: " + constituent.toString());
			System.err.println(tree.getLeaves().subList(constituent.start(), constituent.end() + 1));
		}
	}
}
 
Example #2
Source File: Demo.java    From minie with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String args[]) {
    // Dependency parsing pipeline initialization
    StanfordCoreNLP parser = CoreNLPUtils.StanfordDepNNParser();
    
    // Input sentence
    String sentence = "The Joker believes that the hero Batman was not actually born in foggy Gotham City.";
    
    // Generate the extractions (With SAFE mode)
    MinIE minie = new MinIE(sentence, parser, MinIE.Mode.SAFE);
    
    // Print the extractions
    System.out.println("\nInput sentence: " + sentence);
    System.out.println("=============================");
    System.out.println("Extractions:");
    for (AnnotatedProposition ap: minie.getPropositions()) {
        System.out.println("\tTriple: " + ap.getTripleAsString());
        System.out.print("\tFactuality: " + ap.getFactualityAsString());
        if (ap.getAttribution().getAttributionPhrase() != null) 
            System.out.print("\tAttribution: " + ap.getAttribution().toStringCompact());
        else
            System.out.print("\tAttribution: NONE");
        System.out.println("\n\t----------");
    }
    
    System.out.println("\n\nDONE!");
}
 
Example #3
Source File: StanfordTokenizer.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
  String text = aJCas.getDocumentText();
  Annotation document = new Annotation(text);
  StanfordCoreNLP stanfordCoreNLP;

  if(!languageMap.containsKey(aJCas.getDocumentLanguage())) {
    throw new AnalysisEngineProcessException(new LanguageNotSupportedException("Language Not Supported"));
  }

  stanfordCoreNLP = stanfordCoreNLPs[languageMap.get(aJCas.getDocumentLanguage())];

  stanfordCoreNLP.annotate(document);
  List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
  for (CoreMap sentence : sentences) {
    int sstart = sentence.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
    int ssend = sentence.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
    Sentence jsentence = new Sentence(aJCas, sstart, ssend);
    jsentence.addToIndexes();

    for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
      Token casToken = new Token(aJCas, token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
      casToken.addToIndexes();
    }
  }
}
 
Example #4
Source File: Chapter3.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 6 votes vote down vote up
private static void usingStanfordPipeline() {
        Properties properties = new Properties();
        properties.put("annotators", "tokenize, ssplit");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(properties);
        Annotation annotation = new Annotation(paragraph);
        pipeline.annotate(annotation);
        pipeline.prettyPrint(annotation, System.out);
//        try {
//            pipeline.xmlPrint(annotation, System.out);
//        } catch (IOException ex) {
//            ex.printStackTrace();
//        }
        Annotation a = pipeline.process(paragraph);
        System.out.println("----------");
        System.out.println(a.size());
        System.out.println("----------");
        System.out.println(annotation);
        System.out.println("----------");
        System.out.println(annotation.toShorterString("NN"));
//        TreePrint treePrint = pipeline.getConstituentTreePrinter();
//        treePrint = pipeline.getDependencyTreePrinter();
//        treePrint.printTree(new SimpleTree());
    }
 
Example #5
Source File: Chapter5.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 6 votes vote down vote up
private static void usingStanfordPOSTagger() {
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos");
    props.put("pos.model", "C:\\Current Books in Progress\\NLP and Java\\Models\\english-caseless-left3words-distsim.tagger");
    props.put("pos.maxlen", 10);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation document = new Annotation(theSentence);
    pipeline.annotate(document);

    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            String word = token.get(TextAnnotation.class);
            String pos = token.get(PartOfSpeechAnnotation.class);
            System.out.print(word + "/" + pos + " ");
        }
        System.out.println();

        try {
            pipeline.xmlPrint(document, System.out);
            pipeline.prettyPrint(document, System.out);
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }
}
 
Example #6
Source File: TestCustomLemmaAnnotator.java    From blog-codes with Apache License 2.0 6 votes vote down vote up
@Test
public void test() {
	Properties props = new Properties();
	props.setProperty("annotators", "tokenize,ssplit,pos,custom.lemma");
	props.setProperty("customAnnotatorClass.custom.lemma", "com.fancyerii.blog.stanfordnlp.CustomLemmaAnnotator");
	props.setProperty("custom.lemma.lemmaFile", "custom-lemmas.txt");
	// set up pipeline
	StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	
	CoreDocument exampleDocument = new CoreDocument("Some many goods there.");
	// annotate document
	pipeline.annotate(exampleDocument);
	// access tokens from a CoreDocument
	// a token is represented by a CoreLabel
	List<CoreLabel> firstSentenceTokens = exampleDocument.sentences().get(0).tokens();
	// this for loop will print out all of the tokens and the character offset info
	for (CoreLabel token : firstSentenceTokens) {
		System.out.println(token.word()+"/"+token.getString(LemmaAnnotation.class) + "\t" + token.beginPosition() + "\t" + token.endPosition());
	}
}
 
Example #7
Source File: SentimentDetector.java    From AIBlueprints with MIT License 6 votes vote down vote up
public SentimentDetector(Connection db) {
    this.db = db;
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, parse, sentiment");
    pipeline = new StanfordCoreNLP(props);
    logger = Logger.getLogger("SentimentDetector");
    adjectives = new HashMap<String, Double>();
    try {
        BufferedReader adjfile = new BufferedReader(
                new InputStreamReader(
                        new FileInputStream("adjectives/2000.tsv")));
        String line = adjfile.readLine();
        while(line != null) {
            String[] fields = line.split("\\t");
            if(fields.length == 3) {
                adjectives.put(fields[0], Double.parseDouble(fields[1]));
            }
            line = adjfile.readLine();
        }
        adjfile.close();
    } catch(IOException e) {
        logger.log(Level.SEVERE, e.toString());
    }
}
 
Example #8
Source File: CorefTool.java    From Criteria2Query with Apache License 2.0 6 votes vote down vote up
public void extractCoref() {
	String s="Subjects with hypothyroidism who are on stable treatment for 3 months prior to screening are required to have TSH and free thyroxine (FT4) obtained. If the TSH value is out of range, but FT4 is normal, such cases should be discussed directly with the JRD responsible safety physician before the subject is enrolled. If the FT4 value is out of range, the subject is not eligible.";
	 Annotation document = new Annotation(s);
	    Properties props = new Properties();
	    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,mention,coref");
	    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	    pipeline.annotate(document);
	    System.out.println("---");
	    System.out.println("coref chains");
	    for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
	      System.out.println("\t" + cc);
	    }
	    for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
	      System.out.println("---");
	      System.out.println("mentions");
	      for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) {
	        System.out.println("\t" + m);
	       }
	    }
}
 
Example #9
Source File: MinIE.java    From minie with GNU General Public License v3.0 6 votes vote down vote up
/** 
 * Given an input sentence, parser, mode and a dictionary, make extractions and then minimize them accordingly.
 * The parsing occurs INSIDE this function.
 * 
 * @param sentence - input sentence
 * @param parser - dependency parse pipeline for the sentence
 * @param mode - minimization mode
 * @param d - dictionary (for MinIE-D)
 */
public void minimize(String sentence, StanfordCoreNLP parser, Mode mode, Dictionary d) {
    // Run ClausIE first
    ClausIE clausie = new ClausIE();
    clausie.setSemanticGraph(CoreNLPUtils.parse(parser, sentence));
    clausie.detectClauses();
    clausie.generatePropositions(clausie.getSemanticGraph());
    
    // Start minimizing by annotating
    this.setSemanticGraph(clausie.getSemanticGraph());
    this.setPropositions(clausie);
    this.setPolarity();
    this.setModality();
    
    // Minimize according to the modes (COMPLETE mode doesn't minimize) 
    if (mode == Mode.SAFE)
        this.minimizeSafeMode();
    else if (mode == Mode.DICTIONARY)
        this.minimizeDictionaryMode(d.words());
    else if (mode == Mode.AGGRESSIVE)
        this.minimizeAggressiveMode();
    
    this.removeDuplicates();
}
 
Example #10
Source File: CorefExample.java    From blog-codes with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	Annotation document = new Annotation(
			"Barack Obama was born in Hawaii.  He is the president. Obama was elected in 2008.");
	Properties props = new Properties();
	props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,coref");
	StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	pipeline.annotate(document);
	System.out.println("---");
	System.out.println("coref chains");
	for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
		System.out.println("\t" + cc);
	}
	for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
		System.out.println("---");
		System.out.println("mentions");
		for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) {
			System.out.println("\t" + m);
		}
	}
}
 
Example #11
Source File: NlpOptionsTest.java    From coreNlp with Apache License 2.0 6 votes vote down vote up
@Test
public void sentenceParsingTest() {
    NlpOptions options = NlpOptions.sentenceParser(true);
    Properties props = options.getNlpProperties();
    assertTrue(props.getProperty("annotators").equals("tokenize, ssplit, pos, lemma, parse"));
    assertTrue(options.lemmatisation);
    assertFalse(options.namedEntityRecognition);
    assertFalse(options.namedEntityRecognitionRegex);
    assertTrue(options.sentenceParser);
    assertFalse(options.coreferenceAnalysis);
    assertTrue(options.corefMaxSentenceDist == -1);
    assertFalse(options.corefPostProcessing);

    StanfordCoreNLP nlp = options.buildNlpAnalyzer();
    assertNotNull(nlp);
}
 
Example #12
Source File: KBPTest.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
/**
 * A debugging method to try relation extraction from the console.
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
  Properties props = StringUtils.argsToProperties(args);
  props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
  props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");

  StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
  IOUtils.console("sentence> ", line -> {
    Annotation ann = new Annotation(line);
    pipeline.annotate(ann);
    for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
      sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(System.err::println);
      System.out.println(sentence);
    }
  });
}
 
Example #13
Source File: InteractiveDriver.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args) throws IOException {
    Properties props = StringUtils.argsToProperties(args);
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
    props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");
    Set<String> interested = Stream.of("per:title", "per:employee_of", "org:top_members/employees").collect(Collectors.toSet());
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    IOUtils.console("sentence> ", line -> {
        Annotation ann = new Annotation(line);
        pipeline.annotate(ann);
        for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
            sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(r -> {
                 String relation = r.relationGloss();
                if(interested.contains(relation)) {
                    System.err.println(r);
                }
            });
        }
    });
}
 
Example #14
Source File: StanfordCoreNLPTest.java    From java_in_examples with Apache License 2.0 6 votes vote down vote up
public static void main(String[] s) {
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    // read some text in the text variable
    String text = "\"But I do not want to go among mad people,\" Alice remarked.\n" +
            "\"Oh, you can not help that,\" said the Cat: \"we are all mad here. I am mad. You are mad.\"\n" +
            "\"How do you know I am mad?\" said Alice.\n" +
            "\"You must be,\" said the Cat, \"or you would not have come here.\" This is awful, bad, disgusting";

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);

    // run all Annotators on this text
    pipeline.annotate(document);

    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        String sentiment = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
        System.out.println(sentiment + "\t" + sentence);
    }
}
 
Example #15
Source File: FramatDemo.java    From mateplus with GNU General Public License v2.0 6 votes vote down vote up
public FramatDemo(String[] commandlineoptions) throws ZipException, ClassNotFoundException, IOException {
	FullPipelineOptions options = new CompletePipelineCMDLineOptions();
	options.parseCmdLineArgs(commandlineoptions); // process options
	
	Parse.parseOptions = options.getParseOptions();
	Parse.parseOptions.globalFeats = true; // activate additional global features
	
	// set glove directory if available		
	glove = (options.glovedir!=null)?new ExternalProcesses(options.glovedir):null;		
	
	Properties props = new Properties();
	props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
	preprocessor = new StanfordCoreNLP(props); // initialize preprocessing		
	mstparser = "nc " + options.mstserver;
	semafor = "nc " + options.semaforserver;
	
	pipeline = CompletePipeline.getCompletePipeline(options); // initialize pipeline	
}
 
Example #16
Source File: FramatHttpPipeline.java    From mateplus with GNU General Public License v2.0 6 votes vote down vote up
public FramatHttpPipeline(SemanticRoleLabeler srl, ImageCache imageCache,
		L l, int sentenceMaxLength, HttpOptions options) {
	super(sentenceMaxLength, options);
	
	semafor = "nc " + options.semaforserver;
	mstparser = "nc " + options.mstserver;
	
	this.srl = srl;
	this.defaultHandler = new DefaultHandler(l, this);
	this.imageCache = imageCache;

	Properties props = new Properties();
	props.put("annotators",
			"tokenize, ssplit, pos, lemma, ner, parse, dcoref");
	props.put("dcoref.sievePasses", "MarkRole," + "DiscourseMatch,"
			+ "ExactStringMatch," + "RelaxedExactStringMatch,"
			+ "PreciseConstructs," + "StrictHeadMatch1,"
			+ "StrictHeadMatch2," + "StrictHeadMatch3,"
			+ "StrictHeadMatch4," + "RelaxedHeadMatch");

	pipeline = new StanfordCoreNLP(props);

	if(options.glovedir!=null)
		glove = new ExternalProcesses(options.glovedir);
	
}
 
Example #17
Source File: Postprocess.java    From phrases with Apache License 2.0 6 votes vote down vote up
public List<Pattern> run(List<Pattern> patterns) {

        Properties props = new Properties();
        props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, sentiment");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

        for (Pattern pattern : patterns) {
            Annotation annotation = pipeline.process(pattern.toSentences());
            for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
                    Tree tree = sentence.get(SentimentCoreAnnotations.AnnotatedTree.class);
                    int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
                    for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
                        String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);

                    }
            }
        }
        return null;
    }
 
Example #18
Source File: NlpOptionsTest.java    From coreNlp with Apache License 2.0 6 votes vote down vote up
@Test
public void tokenizationOnlyTest() {
    NlpOptions options = NlpOptions.tokenizationOnly(false);
    Properties props = options.getNlpProperties();
    assertTrue(props.getProperty("annotators").equals("tokenize, ssplit, pos"));
    assertFalse(options.lemmatisation);
    assertFalse(options.namedEntityRecognition);
    assertFalse(options.namedEntityRecognitionRegex);
    assertFalse(options.sentenceParser);
    assertFalse(options.coreferenceAnalysis);
    assertTrue(options.corefMaxSentenceDist == -1);
    assertFalse(options.corefPostProcessing);

    StanfordCoreNLP nlp = options.buildNlpAnalyzer();
    assertNotNull(nlp);
}
 
Example #19
Source File: JavaReExTest.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args){
    try{
      Properties props = StringUtils.argsToProperties(args);
//      props.setProperty("annotators", "tokenize,ssplit,lemma,pos,parse,ner");
      StanfordCoreNLP pipeline = new StanfordCoreNLP();
      String sentence = "John Gerspach was named Chief Financial Officer of Citi in July 2009.";
      Annotation doc = new Annotation(sentence);
      pipeline.annotate(doc);
      RelationExtractorAnnotator r = new RelationExtractorAnnotator(props);
      r.annotate(doc);

      for(CoreMap s: doc.get(CoreAnnotations.SentencesAnnotation.class)){
        System.out.println("For sentence " + s.get(CoreAnnotations.TextAnnotation.class));
        List<RelationMention> rls  = s.get(RelationMentionsAnnotation.class);
        for(RelationMention rl: rls){
          System.out.println(rl.toString());
        }
      }
    }catch(Exception e){
      e.printStackTrace();
    }
  }
 
Example #20
Source File: NlpOptionsTest.java    From coreNlp with Apache License 2.0 6 votes vote down vote up
@Test
public void tokenizationOnlyWithLemmaTest() {
    NlpOptions options = NlpOptions.tokenizationOnly(true);
    Properties props = options.getNlpProperties();
    assertTrue(props.getProperty("annotators").equals("tokenize, ssplit, pos, lemma"));
    assertTrue(options.lemmatisation);
    assertFalse(options.namedEntityRecognition);
    assertFalse(options.namedEntityRecognitionRegex);
    assertFalse(options.sentenceParser);
    assertFalse(options.coreferenceAnalysis);
    assertTrue(options.corefMaxSentenceDist == -1);
    assertFalse(options.corefPostProcessing);

    StanfordCoreNLP nlp = options.buildNlpAnalyzer();
    assertNotNull(nlp);
}
 
Example #21
Source File: NlpOptionsTest.java    From coreNlp with Apache License 2.0 6 votes vote down vote up
@Test
public void namedEntitiesWithCoreferenceAnalysisTest() {
    NlpOptions options = NlpOptions.namedEntitiesWithCoreferenceAnalysis(true, maxSentenceDist, false);
    Properties props = options.getNlpProperties();
    assertTrue(props.getProperty("annotators").equals("tokenize, ssplit, pos, lemma, ner, regexner, parse, dcoref"));
    assertTrue(options.lemmatisation);
    assertTrue(options.namedEntityRecognition);
    assertTrue(options.namedEntityRecognitionRegex);
    assertTrue(options.sentenceParser);
    assertTrue(options.coreferenceAnalysis);
    assertTrue(options.corefMaxSentenceDist == maxSentenceDist);
    assertFalse(options.corefPostProcessing);

    StanfordCoreNLP nlp = options.buildNlpAnalyzer();
    assertNotNull(nlp);
}
 
Example #22
Source File: NlpOptionsTest.java    From coreNlp with Apache License 2.0 6 votes vote down vote up
@Test
public void namedEntityRecognitionTest() {
    NlpOptions options = NlpOptions.namedEntityRecognition(false, false);
    Properties props = options.getNlpProperties();
    assertTrue(props.getProperty("annotators").equals("tokenize, ssplit, pos, lemma, ner"));
    assertTrue(options.lemmatisation);
    assertTrue(options.namedEntityRecognition);
    assertFalse(options.namedEntityRecognitionRegex);
    assertFalse(options.sentenceParser);
    assertFalse(options.coreferenceAnalysis);
    assertTrue(options.corefMaxSentenceDist == -1);
    assertFalse(options.corefPostProcessing);

    StanfordCoreNLP nlp = options.buildNlpAnalyzer();
    assertNotNull(nlp);
}
 
Example #23
Source File: NlpOptionsTest.java    From coreNlp with Apache License 2.0 6 votes vote down vote up
@Test
public void namedEntityRecognitionWithRegexTest() {
    NlpOptions options = NlpOptions.namedEntityRecognition(true, false);
    Properties props = options.getNlpProperties();
    assertTrue(props.getProperty("annotators").equals("tokenize, ssplit, pos, lemma, ner, regexner"));
    assertTrue(options.lemmatisation);
    assertTrue(options.namedEntityRecognition);
    assertTrue(options.namedEntityRecognitionRegex);
    assertFalse(options.sentenceParser);
    assertFalse(options.coreferenceAnalysis);
    assertTrue(options.corefMaxSentenceDist == -1);
    assertFalse(options.corefPostProcessing);

    StanfordCoreNLP nlp = options.buildNlpAnalyzer();
    assertNotNull(nlp);
}
 
Example #24
Source File: NLP.java    From FXDesktopSearch with Apache License 2.0 5 votes vote down vote up
private synchronized static StanfordCoreNLP cachedPipeLine(final SupportedLanguage aLanguage, final Producer<StanfordCoreNLP> aCreator) {
    StanfordCoreNLP nlp = PIPELINES.get(aLanguage);
    if (nlp == null) {
        log.info("No cached pipeline for {}", aLanguage);
        nlp = aCreator.produce();
        PIPELINES.put(aLanguage, nlp);
        log.info("Pipeline created!");
    }
    return nlp;
}
 
Example #25
Source File: CoreNLPCorefResolver.java    From Heracles with GNU General Public License v3.0 5 votes vote down vote up
@Override
	public void validatedProcess(Dataset dataset, String spanTypeOfTextualUnit) {
		Properties prop1 = new Properties();
//		prop1.setProperty("annotators", "parse dcoref");
		prop1.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,mention,coref");
		prop1.setProperty("coref.algorithm", "neural");
		StanfordCoreNLP pipeline = new StanfordCoreNLP(prop1, false);
		
//		Annotation document = new Annotation("Barack Obama was born in Hawaii.  He is the president. Obama was elected in 2008.");
		
		for (Span span : dataset.getSpans(spanTypeOfTextualUnit)){
//			TreeSet<Span> sentences = span.getDataset().getSpans(span, "sentence");
////			Framework.log("Sentences: "+sentences);
//			String reviewTextCorrected = "";
//			for (Span sentence : sentences){
//				reviewTextCorrected += " " + sentence.getAnnotations().get("text");
//			}
//			reviewTextCorrected = reviewTextCorrected.trim().replaceAll("  "," ");
			Framework.log(span.getAnnotation("text"));
			
			HashMap<Integer, Word> wordIndex = new HashMap<>();
			Annotation a = new Annotation(span.getAnnotation("text", String.class));
//			Annotation a = CoreNLPHelper.reconstructStanfordAnnotations(span, wordIndex);
			
			pipeline.annotate(a);
		
			System.out.println("coref chains");
		    for (CorefChain cc : a.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
		      System.out.println("\t" + cc);
		    }
//		    for (CoreMap sentence : a.get(CoreAnnotations.SentencesAnnotation.class)) {
//		      System.out.println("---");
//		      System.out.println("mentions");
//		      for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) {
//		        System.out.println("\t" + m);
//		       }
//		    }
		}
	}
 
Example #26
Source File: CoreNLPParser.java    From Heracles with GNU General Public License v3.0 5 votes vote down vote up
@Override
	public void validatedProcess(Dataset dataset, String spanTypeOfSentenceUnit) {
		Properties prop1 = new Properties();
		prop1.setProperty("annotators", "parse");
		StanfordCoreNLP pipeline = new StanfordCoreNLP(prop1, false);
		
		for (Span span : dataset.getSpans(spanTypeOfSentenceUnit)){

			
			HashMap<Integer, Word> wordIndex = new HashMap<>();
			Annotation a = CoreNLPHelper.reconstructStanfordAnnotations(span, wordIndex);
//			Annotation a = new Annotation((String)span.getAnnotations().get("text"));
			
			if (a == null){
				System.out.println(a);
			}
			pipeline.annotate(a);
			for (CoreMap sentence : a.get(SentencesAnnotation.class)){
				//per sentence, check the syntax tree
				Tree tree = sentence.get(TreeAnnotation.class);
//				tree.percolateHeadAnnotations(headFinder);
//				tree.indentedListPrint();
				
				try {
					analyzeTree(tree, span, wordIndex);
				} catch (IllegalSpanException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
				
			}
			
		}

	}
 
Example #27
Source File: CoreNLPNamedEntityRecognizer.java    From Heracles with GNU General Public License v3.0 5 votes vote down vote up
/**
	 * Process the Dataset in chunks, as defined by the <code>spanType</code> parameter.
	 * The Spans denoted by spanType must each contain Words belonging to a single sentence.
	 * 
	 */
	@Override
	public void validatedProcess(Dataset dataset, String spanTypeOfSentenceUnit){
		
		
		
		Properties prop1 = new Properties();
		prop1.setProperty("annotators", "ner");
		StanfordCoreNLP pipeline = new StanfordCoreNLP(prop1, false);
		
		for (Span span : dataset.getSpans(spanTypeOfSentenceUnit)){

			
			HashMap<Integer, Word> wordIndex = new HashMap<>();
			Annotation a = CoreNLPHelper.reconstructStanfordAnnotations(span, wordIndex);
			if (a == null){
				System.out.println(a);
			}
			pipeline.annotate(a);
			List<CoreMap> sentenceAnnotations = a.get(SentencesAnnotation.class);
			for (CoreMap sentence : sentenceAnnotations){
				for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
					
					Word w = wordIndex.get(token.get(CharacterOffsetBeginAnnotation.class));
					String ner = token.get(NamedEntityTagAnnotation.class);
					String nerValue = token.get(NormalizedNamedEntityTagAnnotation.class);
					if (ner!=null)
						w.putAnnotation("nerLabel", ner);
					if (nerValue!=null)
						w.putAnnotation("nerValue", nerValue);
					
//					System.out.println(w.getAnnotations());
				}
			

				
				
			}
		}		
	}
 
Example #28
Source File: CoreNLPLemmatizer.java    From Heracles with GNU General Public License v3.0 5 votes vote down vote up
/**
	 * Process the Dataset in chunks, as defined by the <code>spanType</code> parameter.
	 * The Spans denoted by spanType must each contain Words belonging to a single sentence.
	 * 
	 */
	@Override
	public void validatedProcess(Dataset dataset, String spanTypeOfSentenceUnit){
		
		
		
		Properties prop1 = new Properties();
		prop1.setProperty("annotators", "lemma");
		StanfordCoreNLP pipeline = new StanfordCoreNLP(prop1, false);
		
		for (Span span : dataset.getSpans(spanTypeOfSentenceUnit)){

			
			HashMap<Integer, Word> wordIndex = new HashMap<>();
			Annotation a = CoreNLPHelper.reconstructStanfordAnnotations(span, wordIndex);
			if (a == null){
				System.out.println(a);
			}
			pipeline.annotate(a);
			List<CoreMap> sentenceAnnotations = a.get(SentencesAnnotation.class);
			for (CoreMap sentence : sentenceAnnotations){
				for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
					
					Word w = wordIndex.get(token.get(CharacterOffsetBeginAnnotation.class));
					String tempLemma = token.get(LemmaAnnotation.class);
					w.putAnnotation("lemma", tempLemma.toLowerCase());
//					System.out.println(w.getAnnotations());
				}
			

				
				
			}
		}		
	}
 
Example #29
Source File: SplitSentences.java    From tint with GNU General Public License v3.0 5 votes vote down vote up
public static void main(String[] args) {
    try {
        final CommandLine cmd = CommandLine
                .parser()
                .withName("./annotate-sentences")
                .withHeader("Annotate sentences")
                .withOption("i", "input", "Input file", "FILE",
                        CommandLine.Type.FILE_EXISTING, true, false, true)
                .withOption("o", "output", "Output file", "FILE",
                        CommandLine.Type.FILE_EXISTING, true, false, true)
                .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);

        File input = cmd.getOptionValue("input", File.class);
        File output = cmd.getOptionValue("output", File.class);

        String text = new String(Files.readAllBytes(input.toPath()), Charsets.UTF_8);
        BufferedWriter writer = new BufferedWriter(new FileWriter(output));

        Properties props = new Properties();
        props.setProperty("annotators", "ita_toksent");
        props.setProperty("customAnnotatorClass.ita_toksent",
                "eu.fbk.dh.tint.tokenizer.annotators.ItalianTokenizerAnnotator");

        StanfordCoreNLP ITApipeline = new StanfordCoreNLP(props);
        Annotation annotation = new Annotation(text);
        ITApipeline.annotate(annotation);

        List<CoreMap> sents = annotation.get(CoreAnnotations.SentencesAnnotation.class);
        for (CoreMap thisSent : sents) {
            writer.append(thisSent.get(CoreAnnotations.TextAnnotation.class)).append("\n");
        }

        writer.close();

    } catch (Exception e) {
        CommandLine.fail(e);
    }
}
 
Example #30
Source File: VerbTest.java    From tint with GNU General Public License v3.0 5 votes vote down vote up
public static void main(String[] args) {
        try {
            Properties properties = new Properties();
            properties.setProperty("annotators", "ita_toksent, udpipe, ita_verb");
            properties.setProperty("customAnnotatorClass.udpipe", "eu.fbk.fcw.udpipe.api.UDPipeAnnotator");
            properties.setProperty("customAnnotatorClass.ita_toksent",
                    "eu.fbk.dh.tint.tokenizer.annotators.ItalianTokenizerAnnotator");
            properties.setProperty("customAnnotatorClass.ita_verb",
                    "eu.fbk.dh.tint.verb.VerbAnnotator");

            properties.setProperty("udpipe.server", "gardner");
            properties.setProperty("udpipe.port", "50020");
            properties.setProperty("udpipe.keepOriginal", "1");

//        properties.setProperty("udpipe.model", "/Users/alessio/Desktop/model");
            StanfordCoreNLP pipeline = new StanfordCoreNLP(properties);

            Annotation annotation = new Annotation("Il caporale alpino Giampietro Civati caduto in combattimento il 5 dicembre 1944, come racconta Silvestri, ha scritto questo mirabile testamento: «sono figlio d’Italia, d’anni 21, non di Graziani e nemmeno di Badoglio, ma sono italiano e seguo la via che salverà l’onore d’Italia».");
            pipeline.annotate(annotation);
            String out = JSONOutputter.jsonPrint(annotation);
            System.out.println(out);
//            for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
//                System.out.println(sentence.get(VerbAnnotations.VerbsAnnotation.class));
//            }

        } catch (Exception e) {
            e.printStackTrace();
        }
    }