edu.stanford.nlp.pipeline.StanfordCoreNLP Java Examples

The following examples show how to use edu.stanford.nlp.pipeline.StanfordCoreNLP. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ConstituentExample.java    From blog-codes with Apache License 2.0 7 votes vote down vote up
public static void main(String[] args) {
	// set up pipeline properties
	Properties props = new Properties();
	props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse");
	// use faster shift reduce parser
	//props.setProperty("parse.model", "edu/stanford/nlp/models/srparser/englishSR.ser.gz");
	props.setProperty("parse.maxlen", "100");
	// set up Stanford CoreNLP pipeline
	StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	// build annotation for a review
	Annotation annotation = new Annotation("The small red car turned very quickly around the corner.");
	// annotate
	pipeline.annotate(annotation);
	// get tree
	Tree tree = annotation.get(CoreAnnotations.SentencesAnnotation.class).get(0)
			.get(TreeCoreAnnotations.TreeAnnotation.class);
	System.out.println(tree);
	Set<Constituent> treeConstituents = tree.constituents(new LabeledScoredConstituentFactory());
	for (Constituent constituent : treeConstituents) {
		if (constituent.label() != null
				&& (constituent.label().toString().equals("VP") || constituent.label().toString().equals("NP"))) {
			System.err.println("found constituent: " + constituent.toString());
			System.err.println(tree.getLeaves().subList(constituent.start(), constituent.end() + 1));
		}
	}
}
 
Example #2
Source File: CorefExample.java    From blog-codes with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	Annotation document = new Annotation(
			"Barack Obama was born in Hawaii.  He is the president. Obama was elected in 2008.");
	Properties props = new Properties();
	props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,coref");
	StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	pipeline.annotate(document);
	System.out.println("---");
	System.out.println("coref chains");
	for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
		System.out.println("\t" + cc);
	}
	for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
		System.out.println("---");
		System.out.println("mentions");
		for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) {
			System.out.println("\t" + m);
		}
	}
}
 
Example #3
Source File: TestCustomLemmaAnnotator.java    From blog-codes with Apache License 2.0 6 votes vote down vote up
@Test
public void test() {
	Properties props = new Properties();
	props.setProperty("annotators", "tokenize,ssplit,pos,custom.lemma");
	props.setProperty("customAnnotatorClass.custom.lemma", "com.fancyerii.blog.stanfordnlp.CustomLemmaAnnotator");
	props.setProperty("custom.lemma.lemmaFile", "custom-lemmas.txt");
	// set up pipeline
	StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	
	CoreDocument exampleDocument = new CoreDocument("Some many goods there.");
	// annotate document
	pipeline.annotate(exampleDocument);
	// access tokens from a CoreDocument
	// a token is represented by a CoreLabel
	List<CoreLabel> firstSentenceTokens = exampleDocument.sentences().get(0).tokens();
	// this for loop will print out all of the tokens and the character offset info
	for (CoreLabel token : firstSentenceTokens) {
		System.out.println(token.word()+"/"+token.getString(LemmaAnnotation.class) + "\t" + token.beginPosition() + "\t" + token.endPosition());
	}
}
 
Example #4
Source File: StanfordTokenizer.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
  String text = aJCas.getDocumentText();
  Annotation document = new Annotation(text);
  StanfordCoreNLP stanfordCoreNLP;

  if(!languageMap.containsKey(aJCas.getDocumentLanguage())) {
    throw new AnalysisEngineProcessException(new LanguageNotSupportedException("Language Not Supported"));
  }

  stanfordCoreNLP = stanfordCoreNLPs[languageMap.get(aJCas.getDocumentLanguage())];

  stanfordCoreNLP.annotate(document);
  List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
  for (CoreMap sentence : sentences) {
    int sstart = sentence.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
    int ssend = sentence.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
    Sentence jsentence = new Sentence(aJCas, sstart, ssend);
    jsentence.addToIndexes();

    for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
      Token casToken = new Token(aJCas, token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
      casToken.addToIndexes();
    }
  }
}
 
Example #5
Source File: Chapter5.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 6 votes vote down vote up
private static void usingStanfordPOSTagger() {
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos");
    props.put("pos.model", "C:\\Current Books in Progress\\NLP and Java\\Models\\english-caseless-left3words-distsim.tagger");
    props.put("pos.maxlen", 10);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation document = new Annotation(theSentence);
    pipeline.annotate(document);

    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            String word = token.get(TextAnnotation.class);
            String pos = token.get(PartOfSpeechAnnotation.class);
            System.out.print(word + "/" + pos + " ");
        }
        System.out.println();

        try {
            pipeline.xmlPrint(document, System.out);
            pipeline.prettyPrint(document, System.out);
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }
}
 
Example #6
Source File: Chapter3.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 6 votes vote down vote up
private static void usingStanfordPipeline() {
        Properties properties = new Properties();
        properties.put("annotators", "tokenize, ssplit");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(properties);
        Annotation annotation = new Annotation(paragraph);
        pipeline.annotate(annotation);
        pipeline.prettyPrint(annotation, System.out);
//        try {
//            pipeline.xmlPrint(annotation, System.out);
//        } catch (IOException ex) {
//            ex.printStackTrace();
//        }
        Annotation a = pipeline.process(paragraph);
        System.out.println("----------");
        System.out.println(a.size());
        System.out.println("----------");
        System.out.println(annotation);
        System.out.println("----------");
        System.out.println(annotation.toShorterString("NN"));
//        TreePrint treePrint = pipeline.getConstituentTreePrinter();
//        treePrint = pipeline.getDependencyTreePrinter();
//        treePrint.printTree(new SimpleTree());
    }
 
Example #7
Source File: SentimentDetector.java    From AIBlueprints with MIT License 6 votes vote down vote up
public SentimentDetector(Connection db) {
    this.db = db;
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, parse, sentiment");
    pipeline = new StanfordCoreNLP(props);
    logger = Logger.getLogger("SentimentDetector");
    adjectives = new HashMap<String, Double>();
    try {
        BufferedReader adjfile = new BufferedReader(
                new InputStreamReader(
                        new FileInputStream("adjectives/2000.tsv")));
        String line = adjfile.readLine();
        while(line != null) {
            String[] fields = line.split("\\t");
            if(fields.length == 3) {
                adjectives.put(fields[0], Double.parseDouble(fields[1]));
            }
            line = adjfile.readLine();
        }
        adjfile.close();
    } catch(IOException e) {
        logger.log(Level.SEVERE, e.toString());
    }
}
 
Example #8
Source File: CorefTool.java    From Criteria2Query with Apache License 2.0 6 votes vote down vote up
public void extractCoref() {
	String s="Subjects with hypothyroidism who are on stable treatment for 3 months prior to screening are required to have TSH and free thyroxine (FT4) obtained. If the TSH value is out of range, but FT4 is normal, such cases should be discussed directly with the JRD responsible safety physician before the subject is enrolled. If the FT4 value is out of range, the subject is not eligible.";
	 Annotation document = new Annotation(s);
	    Properties props = new Properties();
	    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,mention,coref");
	    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	    pipeline.annotate(document);
	    System.out.println("---");
	    System.out.println("coref chains");
	    for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
	      System.out.println("\t" + cc);
	    }
	    for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
	      System.out.println("---");
	      System.out.println("mentions");
	      for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) {
	        System.out.println("\t" + m);
	       }
	    }
}
 
Example #9
Source File: MinIE.java    From minie with GNU General Public License v3.0 6 votes vote down vote up
/** 
 * Given an input sentence, parser, mode and a dictionary, make extractions and then minimize them accordingly.
 * The parsing occurs INSIDE this function.
 * 
 * @param sentence - input sentence
 * @param parser - dependency parse pipeline for the sentence
 * @param mode - minimization mode
 * @param d - dictionary (for MinIE-D)
 */
public void minimize(String sentence, StanfordCoreNLP parser, Mode mode, Dictionary d) {
    // Run ClausIE first
    ClausIE clausie = new ClausIE();
    clausie.setSemanticGraph(CoreNLPUtils.parse(parser, sentence));
    clausie.detectClauses();
    clausie.generatePropositions(clausie.getSemanticGraph());
    
    // Start minimizing by annotating
    this.setSemanticGraph(clausie.getSemanticGraph());
    this.setPropositions(clausie);
    this.setPolarity();
    this.setModality();
    
    // Minimize according to the modes (COMPLETE mode doesn't minimize) 
    if (mode == Mode.SAFE)
        this.minimizeSafeMode();
    else if (mode == Mode.DICTIONARY)
        this.minimizeDictionaryMode(d.words());
    else if (mode == Mode.AGGRESSIVE)
        this.minimizeAggressiveMode();
    
    this.removeDuplicates();
}
 
Example #10
Source File: Demo.java    From minie with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String args[]) {
    // Dependency parsing pipeline initialization
    StanfordCoreNLP parser = CoreNLPUtils.StanfordDepNNParser();
    
    // Input sentence
    String sentence = "The Joker believes that the hero Batman was not actually born in foggy Gotham City.";
    
    // Generate the extractions (With SAFE mode)
    MinIE minie = new MinIE(sentence, parser, MinIE.Mode.SAFE);
    
    // Print the extractions
    System.out.println("\nInput sentence: " + sentence);
    System.out.println("=============================");
    System.out.println("Extractions:");
    for (AnnotatedProposition ap: minie.getPropositions()) {
        System.out.println("\tTriple: " + ap.getTripleAsString());
        System.out.print("\tFactuality: " + ap.getFactualityAsString());
        if (ap.getAttribution().getAttributionPhrase() != null) 
            System.out.print("\tAttribution: " + ap.getAttribution().toStringCompact());
        else
            System.out.print("\tAttribution: NONE");
        System.out.println("\n\t----------");
    }
    
    System.out.println("\n\nDONE!");
}
 
Example #11
Source File: NlpOptionsTest.java    From coreNlp with Apache License 2.0 6 votes vote down vote up
@Test
public void sentenceParsingTest() {
    NlpOptions options = NlpOptions.sentenceParser(true);
    Properties props = options.getNlpProperties();
    assertTrue(props.getProperty("annotators").equals("tokenize, ssplit, pos, lemma, parse"));
    assertTrue(options.lemmatisation);
    assertFalse(options.namedEntityRecognition);
    assertFalse(options.namedEntityRecognitionRegex);
    assertTrue(options.sentenceParser);
    assertFalse(options.coreferenceAnalysis);
    assertTrue(options.corefMaxSentenceDist == -1);
    assertFalse(options.corefPostProcessing);

    StanfordCoreNLP nlp = options.buildNlpAnalyzer();
    assertNotNull(nlp);
}
 
Example #12
Source File: KBPTest.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
/**
 * A debugging method to try relation extraction from the console.
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
  Properties props = StringUtils.argsToProperties(args);
  props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
  props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");

  StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
  IOUtils.console("sentence> ", line -> {
    Annotation ann = new Annotation(line);
    pipeline.annotate(ann);
    for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
      sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(System.err::println);
      System.out.println(sentence);
    }
  });
}
 
Example #13
Source File: JavaReExTest.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args){
    try{
      Properties props = StringUtils.argsToProperties(args);
//      props.setProperty("annotators", "tokenize,ssplit,lemma,pos,parse,ner");
      StanfordCoreNLP pipeline = new StanfordCoreNLP();
      String sentence = "John Gerspach was named Chief Financial Officer of Citi in July 2009.";
      Annotation doc = new Annotation(sentence);
      pipeline.annotate(doc);
      RelationExtractorAnnotator r = new RelationExtractorAnnotator(props);
      r.annotate(doc);

      for(CoreMap s: doc.get(CoreAnnotations.SentencesAnnotation.class)){
        System.out.println("For sentence " + s.get(CoreAnnotations.TextAnnotation.class));
        List<RelationMention> rls  = s.get(RelationMentionsAnnotation.class);
        for(RelationMention rl: rls){
          System.out.println(rl.toString());
        }
      }
    }catch(Exception e){
      e.printStackTrace();
    }
  }
 
Example #14
Source File: InteractiveDriver.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args) throws IOException {
    Properties props = StringUtils.argsToProperties(args);
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
    props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");
    Set<String> interested = Stream.of("per:title", "per:employee_of", "org:top_members/employees").collect(Collectors.toSet());
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    IOUtils.console("sentence> ", line -> {
        Annotation ann = new Annotation(line);
        pipeline.annotate(ann);
        for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
            sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(r -> {
                 String relation = r.relationGloss();
                if(interested.contains(relation)) {
                    System.err.println(r);
                }
            });
        }
    });
}
 
Example #15
Source File: StanfordCoreNLPTest.java    From java_in_examples with Apache License 2.0 6 votes vote down vote up
public static void main(String[] s) {
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    // read some text in the text variable
    String text = "\"But I do not want to go among mad people,\" Alice remarked.\n" +
            "\"Oh, you can not help that,\" said the Cat: \"we are all mad here. I am mad. You are mad.\"\n" +
            "\"How do you know I am mad?\" said Alice.\n" +
            "\"You must be,\" said the Cat, \"or you would not have come here.\" This is awful, bad, disgusting";

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);

    // run all Annotators on this text
    pipeline.annotate(document);

    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        String sentiment = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
        System.out.println(sentiment + "\t" + sentence);
    }
}
 
Example #16
Source File: FramatDemo.java    From mateplus with GNU General Public License v2.0 6 votes vote down vote up
public FramatDemo(String[] commandlineoptions) throws ZipException, ClassNotFoundException, IOException {
	FullPipelineOptions options = new CompletePipelineCMDLineOptions();
	options.parseCmdLineArgs(commandlineoptions); // process options
	
	Parse.parseOptions = options.getParseOptions();
	Parse.parseOptions.globalFeats = true; // activate additional global features
	
	// set glove directory if available		
	glove = (options.glovedir!=null)?new ExternalProcesses(options.glovedir):null;		
	
	Properties props = new Properties();
	props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
	preprocessor = new StanfordCoreNLP(props); // initialize preprocessing		
	mstparser = "nc " + options.mstserver;
	semafor = "nc " + options.semaforserver;
	
	pipeline = CompletePipeline.getCompletePipeline(options); // initialize pipeline	
}
 
Example #17
Source File: FramatHttpPipeline.java    From mateplus with GNU General Public License v2.0 6 votes vote down vote up
public FramatHttpPipeline(SemanticRoleLabeler srl, ImageCache imageCache,
		L l, int sentenceMaxLength, HttpOptions options) {
	super(sentenceMaxLength, options);
	
	semafor = "nc " + options.semaforserver;
	mstparser = "nc " + options.mstserver;
	
	this.srl = srl;
	this.defaultHandler = new DefaultHandler(l, this);
	this.imageCache = imageCache;

	Properties props = new Properties();
	props.put("annotators",
			"tokenize, ssplit, pos, lemma, ner, parse, dcoref");
	props.put("dcoref.sievePasses", "MarkRole," + "DiscourseMatch,"
			+ "ExactStringMatch," + "RelaxedExactStringMatch,"
			+ "PreciseConstructs," + "StrictHeadMatch1,"
			+ "StrictHeadMatch2," + "StrictHeadMatch3,"
			+ "StrictHeadMatch4," + "RelaxedHeadMatch");

	pipeline = new StanfordCoreNLP(props);

	if(options.glovedir!=null)
		glove = new ExternalProcesses(options.glovedir);
	
}
 
Example #18
Source File: Postprocess.java    From phrases with Apache License 2.0 6 votes vote down vote up
public List<Pattern> run(List<Pattern> patterns) {

        Properties props = new Properties();
        props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, sentiment");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

        for (Pattern pattern : patterns) {
            Annotation annotation = pipeline.process(pattern.toSentences());
            for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
                    Tree tree = sentence.get(SentimentCoreAnnotations.AnnotatedTree.class);
                    int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
                    for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
                        String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);

                    }
            }
        }
        return null;
    }
 
Example #19
Source File: NlpOptionsTest.java    From coreNlp with Apache License 2.0 6 votes vote down vote up
@Test
public void tokenizationOnlyTest() {
    NlpOptions options = NlpOptions.tokenizationOnly(false);
    Properties props = options.getNlpProperties();
    assertTrue(props.getProperty("annotators").equals("tokenize, ssplit, pos"));
    assertFalse(options.lemmatisation);
    assertFalse(options.namedEntityRecognition);
    assertFalse(options.namedEntityRecognitionRegex);
    assertFalse(options.sentenceParser);
    assertFalse(options.coreferenceAnalysis);
    assertTrue(options.corefMaxSentenceDist == -1);
    assertFalse(options.corefPostProcessing);

    StanfordCoreNLP nlp = options.buildNlpAnalyzer();
    assertNotNull(nlp);
}
 
Example #20
Source File: NlpOptionsTest.java    From coreNlp with Apache License 2.0 6 votes vote down vote up
@Test
public void tokenizationOnlyWithLemmaTest() {
    NlpOptions options = NlpOptions.tokenizationOnly(true);
    Properties props = options.getNlpProperties();
    assertTrue(props.getProperty("annotators").equals("tokenize, ssplit, pos, lemma"));
    assertTrue(options.lemmatisation);
    assertFalse(options.namedEntityRecognition);
    assertFalse(options.namedEntityRecognitionRegex);
    assertFalse(options.sentenceParser);
    assertFalse(options.coreferenceAnalysis);
    assertTrue(options.corefMaxSentenceDist == -1);
    assertFalse(options.corefPostProcessing);

    StanfordCoreNLP nlp = options.buildNlpAnalyzer();
    assertNotNull(nlp);
}
 
Example #21
Source File: NlpOptionsTest.java    From coreNlp with Apache License 2.0 6 votes vote down vote up
@Test
public void namedEntityRecognitionTest() {
    NlpOptions options = NlpOptions.namedEntityRecognition(false, false);
    Properties props = options.getNlpProperties();
    assertTrue(props.getProperty("annotators").equals("tokenize, ssplit, pos, lemma, ner"));
    assertTrue(options.lemmatisation);
    assertTrue(options.namedEntityRecognition);
    assertFalse(options.namedEntityRecognitionRegex);
    assertFalse(options.sentenceParser);
    assertFalse(options.coreferenceAnalysis);
    assertTrue(options.corefMaxSentenceDist == -1);
    assertFalse(options.corefPostProcessing);

    StanfordCoreNLP nlp = options.buildNlpAnalyzer();
    assertNotNull(nlp);
}
 
Example #22
Source File: NlpOptionsTest.java    From coreNlp with Apache License 2.0 6 votes vote down vote up
@Test
public void namedEntityRecognitionWithRegexTest() {
    NlpOptions options = NlpOptions.namedEntityRecognition(true, false);
    Properties props = options.getNlpProperties();
    assertTrue(props.getProperty("annotators").equals("tokenize, ssplit, pos, lemma, ner, regexner"));
    assertTrue(options.lemmatisation);
    assertTrue(options.namedEntityRecognition);
    assertTrue(options.namedEntityRecognitionRegex);
    assertFalse(options.sentenceParser);
    assertFalse(options.coreferenceAnalysis);
    assertTrue(options.corefMaxSentenceDist == -1);
    assertFalse(options.corefPostProcessing);

    StanfordCoreNLP nlp = options.buildNlpAnalyzer();
    assertNotNull(nlp);
}
 
Example #23
Source File: NlpOptionsTest.java    From coreNlp with Apache License 2.0 6 votes vote down vote up
@Test
public void namedEntitiesWithCoreferenceAnalysisTest() {
    NlpOptions options = NlpOptions.namedEntitiesWithCoreferenceAnalysis(true, maxSentenceDist, false);
    Properties props = options.getNlpProperties();
    assertTrue(props.getProperty("annotators").equals("tokenize, ssplit, pos, lemma, ner, regexner, parse, dcoref"));
    assertTrue(options.lemmatisation);
    assertTrue(options.namedEntityRecognition);
    assertTrue(options.namedEntityRecognitionRegex);
    assertTrue(options.sentenceParser);
    assertTrue(options.coreferenceAnalysis);
    assertTrue(options.corefMaxSentenceDist == maxSentenceDist);
    assertFalse(options.corefPostProcessing);

    StanfordCoreNLP nlp = options.buildNlpAnalyzer();
    assertNotNull(nlp);
}
 
Example #24
Source File: CoreNlpTokenizerFactory.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
public CoreNlpTokenizerFactory(Map<String, String> configuration) {
    super(configuration);
    Properties properties = new Properties();
    for (Entry<String, String> term : configuration.entrySet()) {
        properties.setProperty(term.getKey(), term.getValue());
    }
    pipeline = new StanfordCoreNLP(properties);
}
 
Example #25
Source File: CoreNlpSegmentFactory.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
@Override
public Annotator build(Map<String, String> configurations) {
    Properties properties = new Properties();
    for (Entry<String, String> keyValue : configurations.entrySet()) {
        String key = keyValue.getKey();
        String value = keyValue.getValue();
        properties.put(key, value);
    }
    Annotator annotator = new StanfordCoreNLP(properties);
    return annotator;
}
 
Example #26
Source File: CoreNlpTokenizerTest.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
/** Test splitting only */
@Test
public void testBasic() throws IOException {
    AnnotationPipeline pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties(//
            "annotators", "tokenize,ssplit", //
            "tokenize.language", "en", //
            "tokenize.options", "americanize=true,asciiQuotes=true,ptb3Dashes=true,ptb3Ellipsis=true,untokenizable=noneKeep" //
    ));

    CoreNlpTokenizer tokenizer = new CoreNlpTokenizer(pipeline);
    String str = "Mary had a little lamb. And everywhere that Mary went, the lamb was sure to go.";
    tokenizer.setReader(new StringReader(str));
    assertTokenStreamContents(tokenizer, //
            new String[] { "Mary", "had", "a", "little", "lamb", ".", //
                    "And", "everywhere", "that", "Mary", "went", ",", //
                    "the", "lamb", "was", "sure", "to", "go", "." },
            // Start offsets:
            new int[] { 0, 5, 9, 11, 18, 22, //
                    24, 28, 39, 44, 49, 53, //
                    55, 59, 64, 68, 73, 76, 78 },
            // End offsets:
            new int[] { 4, 8, 10, 17, 22, 23, //
                    27, 38, 43, 48, 53, 54, //
                    58, 63, 67, 72, 75, 78, 79 },
            // Increments:
            new int[] { 1, 1, 1, 1, 1, 1, //
                    1 + CoreNlpTokenizer.SENTENCE_GAP, 1, 1, 1, 1, 1, //
                    1, 1, 1, 1, 1, 1, 1, 1 } //
    );
}
 
Example #27
Source File: CoreNlpTokenizerTest.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
/** Test with part of speech and lemmatization */
@Test
public void testWithLemma() throws IOException {
    AnnotationPipeline pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties(//
            "annotators", "tokenize,ssplit,pos,lemma", //
            "parse.model", "edu/stanford/nlp/models/srparser/englishSR.ser.gz", //
            "tokenize.language", "en", //
            "tokenize.options", "americanize=true,asciiQuotes=true,ptb3Dashes=true,ptb3Ellipsis=true,untokenizable=noneKeep" //
    ));

    CoreNlpTokenizer tokenizer = new CoreNlpTokenizer(pipeline);
    String str = "Mary had a little lamb. And everywhere that Mary went, the lamb was sure to go.";
    tokenizer.setReader(new StringReader(str));
    assertTokenStreamContents(tokenizer, //
            new String[] { "Mary", "have", "a", "little", "lamb", ".", //
                    "and", "everywhere", "that", "Mary", "go", ",", //
                    "the", "lamb", "be", "sure", "to", "go", "." },
            // Start offsets:
            new int[] { 0, 5, 9, 11, 18, 22, //
                    24, 28, 39, 44, 49, 53, //
                    55, 59, 64, 68, 73, 76, 78 },
            // End offsets:
            new int[] { 4, 8, 10, 17, 22, 23, //
                    27, 38, 43, 48, 53, 54, //
                    58, 63, 67, 72, 75, 78, 79 },
            // Types
            new String[] { "NNP", "VBD", "DT", "JJ", "NN", ".", //
                    "CC", "RB", "IN", "NNP", "VBD", ",", //
                    "DT", "NN", "VBD", "JJ", "TO", "VB", "." },
            // Increments:
            new int[] { 1, 1, 1, 1, 1, 1, //
                    1 + CoreNlpTokenizer.SENTENCE_GAP, 1, 1, 1, 1, 1, //
                    1, 1, 1, 1, 1, 1, 1, 1 } //
    );
}
 
Example #28
Source File: CoreNlpTokenizerTest.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
/** Test with NER */
@Test
public void testWithNER() throws IOException {
    AnnotationPipeline pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties(//
            "annotators", "tokenize,ssplit,pos,lemma,ner", //
            "parse.model", "edu/stanford/nlp/models/srparser/englishSR.ser.gz", //
            "tokenize.language", "en", //
            "tokenize.options", "americanize=true,asciiQuotes=true,ptb3Dashes=true,ptb3Ellipsis=true,untokenizable=noneKeep" //
    ));

    CoreNlpTokenizer tokenizer = new CoreNlpTokenizer(pipeline);
    String str = "Mary had a little lamb. And everywhere that Mary went, the lamb was sure to go.";
    tokenizer.setReader(new StringReader(str));
    assertTokenStreamContents(tokenizer, //
            new String[] { "Mary", "have", "a", "little", "lamb", ".", //
                    "and", "everywhere", "that", "Mary", "go", ",", //
                    "the", "lamb", "be", "sure", "to", "go", "." },
            // Start offsets:
            new int[] { 0, 5, 9, 11, 18, 22, //
                    24, 28, 39, 44, 49, 53, //
                    55, 59, 64, 68, 73, 76, 78 },
            // End offsets:
            new int[] { 4, 8, 10, 17, 22, 23, //
                    27, 38, 43, 48, 53, 54, //
                    58, 63, 67, 72, 75, 78, 79 },
            // Types
            new String[] { "PERSON", "VBD", "DT", "JJ", "NN", ".", //
                    "CC", "RB", "IN", "PERSON", "VBD", ",", //
                    "DT", "NN", "VBD", "JJ", "TO", "VB", "." },
            // Increments:
            new int[] { 1, 1, 1, 1, 1, 1, //
                    1 + CoreNlpTokenizer.SENTENCE_GAP, 1, 1, 1, 1, 1, //
                    1, 1, 1, 1, 1, 1, 1, 1 } //
    );
}
 
Example #29
Source File: CoreNlpSegmenterTestCase.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
@Override
protected Tokenizer getSegmenter() {
    try {
        Properties properties = new Properties();
        properties.load(this.getClass().getResourceAsStream("/StanfordCoreNLP-chinese.properties"));
        StanfordCoreNLP annotator = new StanfordCoreNLP(properties);
        CoreNlpTokenizer tokenizer = new CoreNlpTokenizer(annotator);
        return tokenizer;
    } catch (Exception exception) {
        throw new RuntimeException(exception);
    }
}
 
Example #30
Source File: CoreNlpTokenizerTestCase.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
@Override
protected NlpTokenizer<? extends NlpToken> getTokenizer() {
    try {
        Properties properties = new Properties();
        properties.load(this.getClass().getResourceAsStream("/StanfordCoreNLP-chinese.properties"));
        StanfordCoreNLP annotator = new StanfordCoreNLP(properties);
        return new CoreNlpTokenizer(annotator);
    } catch (Exception exception) {
        throw new RuntimeException(exception);
    }
}