Java Code Examples for edu.stanford.nlp.pipeline.StanfordCoreNLP

The following examples show how to use edu.stanford.nlp.pipeline.StanfordCoreNLP. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: blog-codes   Source File: ConstituentExample.java    License: Apache License 2.0 7 votes vote down vote up
public static void main(String[] args) {
	// set up pipeline properties
	Properties props = new Properties();
	props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse");
	// use faster shift reduce parser
	//props.setProperty("parse.model", "edu/stanford/nlp/models/srparser/englishSR.ser.gz");
	props.setProperty("parse.maxlen", "100");
	// set up Stanford CoreNLP pipeline
	StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	// build annotation for a review
	Annotation annotation = new Annotation("The small red car turned very quickly around the corner.");
	// annotate
	pipeline.annotate(annotation);
	// get tree
	Tree tree = annotation.get(CoreAnnotations.SentencesAnnotation.class).get(0)
			.get(TreeCoreAnnotations.TreeAnnotation.class);
	System.out.println(tree);
	Set<Constituent> treeConstituents = tree.constituents(new LabeledScoredConstituentFactory());
	for (Constituent constituent : treeConstituents) {
		if (constituent.label() != null
				&& (constituent.label().toString().equals("VP") || constituent.label().toString().equals("NP"))) {
			System.err.println("found constituent: " + constituent.toString());
			System.err.println(tree.getLeaves().subList(constituent.start(), constituent.end() + 1));
		}
	}
}
 
Example 2
Source Project: blog-codes   Source File: CorefExample.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	Annotation document = new Annotation(
			"Barack Obama was born in Hawaii.  He is the president. Obama was elected in 2008.");
	Properties props = new Properties();
	props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,coref");
	StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	pipeline.annotate(document);
	System.out.println("---");
	System.out.println("coref chains");
	for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
		System.out.println("\t" + cc);
	}
	for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
		System.out.println("---");
		System.out.println("mentions");
		for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) {
			System.out.println("\t" + m);
		}
	}
}
 
Example 3
Source Project: blog-codes   Source File: TestCustomLemmaAnnotator.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void test() {
	Properties props = new Properties();
	props.setProperty("annotators", "tokenize,ssplit,pos,custom.lemma");
	props.setProperty("customAnnotatorClass.custom.lemma", "com.fancyerii.blog.stanfordnlp.CustomLemmaAnnotator");
	props.setProperty("custom.lemma.lemmaFile", "custom-lemmas.txt");
	// set up pipeline
	StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	
	CoreDocument exampleDocument = new CoreDocument("Some many goods there.");
	// annotate document
	pipeline.annotate(exampleDocument);
	// access tokens from a CoreDocument
	// a token is represented by a CoreLabel
	List<CoreLabel> firstSentenceTokens = exampleDocument.sentences().get(0).tokens();
	// this for loop will print out all of the tokens and the character offset info
	for (CoreLabel token : firstSentenceTokens) {
		System.out.println(token.word()+"/"+token.getString(LemmaAnnotation.class) + "\t" + token.beginPosition() + "\t" + token.endPosition());
	}
}
 
Example 4
Source Project: ambiverse-nlu   Source File: StanfordTokenizer.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
  String text = aJCas.getDocumentText();
  Annotation document = new Annotation(text);
  StanfordCoreNLP stanfordCoreNLP;

  if(!languageMap.containsKey(aJCas.getDocumentLanguage())) {
    throw new AnalysisEngineProcessException(new LanguageNotSupportedException("Language Not Supported"));
  }

  stanfordCoreNLP = stanfordCoreNLPs[languageMap.get(aJCas.getDocumentLanguage())];

  stanfordCoreNLP.annotate(document);
  List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
  for (CoreMap sentence : sentences) {
    int sstart = sentence.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
    int ssend = sentence.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
    Sentence jsentence = new Sentence(aJCas, sstart, ssend);
    jsentence.addToIndexes();

    for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
      Token casToken = new Token(aJCas, token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
      casToken.addToIndexes();
    }
  }
}
 
Example 5
private static void usingStanfordPOSTagger() {
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos");
    props.put("pos.model", "C:\\Current Books in Progress\\NLP and Java\\Models\\english-caseless-left3words-distsim.tagger");
    props.put("pos.maxlen", 10);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation document = new Annotation(theSentence);
    pipeline.annotate(document);

    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            String word = token.get(TextAnnotation.class);
            String pos = token.get(PartOfSpeechAnnotation.class);
            System.out.print(word + "/" + pos + " ");
        }
        System.out.println();

        try {
            pipeline.xmlPrint(document, System.out);
            pipeline.prettyPrint(document, System.out);
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }
}
 
Example 6
private static void usingStanfordPipeline() {
        Properties properties = new Properties();
        properties.put("annotators", "tokenize, ssplit");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(properties);
        Annotation annotation = new Annotation(paragraph);
        pipeline.annotate(annotation);
        pipeline.prettyPrint(annotation, System.out);
//        try {
//            pipeline.xmlPrint(annotation, System.out);
//        } catch (IOException ex) {
//            ex.printStackTrace();
//        }
        Annotation a = pipeline.process(paragraph);
        System.out.println("----------");
        System.out.println(a.size());
        System.out.println("----------");
        System.out.println(annotation);
        System.out.println("----------");
        System.out.println(annotation.toShorterString("NN"));
//        TreePrint treePrint = pipeline.getConstituentTreePrinter();
//        treePrint = pipeline.getDependencyTreePrinter();
//        treePrint.printTree(new SimpleTree());
    }
 
Example 7
Source Project: AIBlueprints   Source File: SentimentDetector.java    License: MIT License 6 votes vote down vote up
public SentimentDetector(Connection db) {
    this.db = db;
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, parse, sentiment");
    pipeline = new StanfordCoreNLP(props);
    logger = Logger.getLogger("SentimentDetector");
    adjectives = new HashMap<String, Double>();
    try {
        BufferedReader adjfile = new BufferedReader(
                new InputStreamReader(
                        new FileInputStream("adjectives/2000.tsv")));
        String line = adjfile.readLine();
        while(line != null) {
            String[] fields = line.split("\\t");
            if(fields.length == 3) {
                adjectives.put(fields[0], Double.parseDouble(fields[1]));
            }
            line = adjfile.readLine();
        }
        adjfile.close();
    } catch(IOException e) {
        logger.log(Level.SEVERE, e.toString());
    }
}
 
Example 8
Source Project: Criteria2Query   Source File: CorefTool.java    License: Apache License 2.0 6 votes vote down vote up
public void extractCoref() {
	String s="Subjects with hypothyroidism who are on stable treatment for 3 months prior to screening are required to have TSH and free thyroxine (FT4) obtained. If the TSH value is out of range, but FT4 is normal, such cases should be discussed directly with the JRD responsible safety physician before the subject is enrolled. If the FT4 value is out of range, the subject is not eligible.";
	 Annotation document = new Annotation(s);
	    Properties props = new Properties();
	    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,mention,coref");
	    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	    pipeline.annotate(document);
	    System.out.println("---");
	    System.out.println("coref chains");
	    for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
	      System.out.println("\t" + cc);
	    }
	    for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
	      System.out.println("---");
	      System.out.println("mentions");
	      for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) {
	        System.out.println("\t" + m);
	       }
	    }
}
 
Example 9
Source Project: minie   Source File: MinIE.java    License: GNU General Public License v3.0 6 votes vote down vote up
/** 
 * Given an input sentence, parser, mode and a dictionary, make extractions and then minimize them accordingly.
 * The parsing occurs INSIDE this function.
 * 
 * @param sentence - input sentence
 * @param parser - dependency parse pipeline for the sentence
 * @param mode - minimization mode
 * @param d - dictionary (for MinIE-D)
 */
public void minimize(String sentence, StanfordCoreNLP parser, Mode mode, Dictionary d) {
    // Run ClausIE first
    ClausIE clausie = new ClausIE();
    clausie.setSemanticGraph(CoreNLPUtils.parse(parser, sentence));
    clausie.detectClauses();
    clausie.generatePropositions(clausie.getSemanticGraph());
    
    // Start minimizing by annotating
    this.setSemanticGraph(clausie.getSemanticGraph());
    this.setPropositions(clausie);
    this.setPolarity();
    this.setModality();
    
    // Minimize according to the modes (COMPLETE mode doesn't minimize) 
    if (mode == Mode.SAFE)
        this.minimizeSafeMode();
    else if (mode == Mode.DICTIONARY)
        this.minimizeDictionaryMode(d.words());
    else if (mode == Mode.AGGRESSIVE)
        this.minimizeAggressiveMode();
    
    this.removeDuplicates();
}
 
Example 10
Source Project: minie   Source File: Demo.java    License: GNU General Public License v3.0 6 votes vote down vote up
public static void main(String args[]) {
    // Dependency parsing pipeline initialization
    StanfordCoreNLP parser = CoreNLPUtils.StanfordDepNNParser();
    
    // Input sentence
    String sentence = "The Joker believes that the hero Batman was not actually born in foggy Gotham City.";
    
    // Generate the extractions (With SAFE mode)
    MinIE minie = new MinIE(sentence, parser, MinIE.Mode.SAFE);
    
    // Print the extractions
    System.out.println("\nInput sentence: " + sentence);
    System.out.println("=============================");
    System.out.println("Extractions:");
    for (AnnotatedProposition ap: minie.getPropositions()) {
        System.out.println("\tTriple: " + ap.getTripleAsString());
        System.out.print("\tFactuality: " + ap.getFactualityAsString());
        if (ap.getAttribution().getAttributionPhrase() != null) 
            System.out.print("\tAttribution: " + ap.getAttribution().toStringCompact());
        else
            System.out.print("\tAttribution: NONE");
        System.out.println("\n\t----------");
    }
    
    System.out.println("\n\nDONE!");
}
 
Example 11
Source Project: coreNlp   Source File: NlpOptionsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void sentenceParsingTest() {
    NlpOptions options = NlpOptions.sentenceParser(true);
    Properties props = options.getNlpProperties();
    assertTrue(props.getProperty("annotators").equals("tokenize, ssplit, pos, lemma, parse"));
    assertTrue(options.lemmatisation);
    assertFalse(options.namedEntityRecognition);
    assertFalse(options.namedEntityRecognitionRegex);
    assertTrue(options.sentenceParser);
    assertFalse(options.coreferenceAnalysis);
    assertTrue(options.corefMaxSentenceDist == -1);
    assertFalse(options.corefPostProcessing);

    StanfordCoreNLP nlp = options.buildNlpAnalyzer();
    assertNotNull(nlp);
}
 
Example 12
/**
 * A debugging method to try relation extraction from the console.
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
  Properties props = StringUtils.argsToProperties(args);
  props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
  props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");

  StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
  IOUtils.console("sentence> ", line -> {
    Annotation ann = new Annotation(line);
    pipeline.annotate(ann);
    for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
      sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(System.err::println);
      System.out.println(sentence);
    }
  });
}
 
Example 13
public static void main(String[] args){
    try{
      Properties props = StringUtils.argsToProperties(args);
//      props.setProperty("annotators", "tokenize,ssplit,lemma,pos,parse,ner");
      StanfordCoreNLP pipeline = new StanfordCoreNLP();
      String sentence = "John Gerspach was named Chief Financial Officer of Citi in July 2009.";
      Annotation doc = new Annotation(sentence);
      pipeline.annotate(doc);
      RelationExtractorAnnotator r = new RelationExtractorAnnotator(props);
      r.annotate(doc);

      for(CoreMap s: doc.get(CoreAnnotations.SentencesAnnotation.class)){
        System.out.println("For sentence " + s.get(CoreAnnotations.TextAnnotation.class));
        List<RelationMention> rls  = s.get(RelationMentionsAnnotation.class);
        for(RelationMention rl: rls){
          System.out.println(rl.toString());
        }
      }
    }catch(Exception e){
      e.printStackTrace();
    }
  }
 
Example 14
public static void main(String[] args) throws IOException {
    Properties props = StringUtils.argsToProperties(args);
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
    props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");
    Set<String> interested = Stream.of("per:title", "per:employee_of", "org:top_members/employees").collect(Collectors.toSet());
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    IOUtils.console("sentence> ", line -> {
        Annotation ann = new Annotation(line);
        pipeline.annotate(ann);
        for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
            sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(r -> {
                 String relation = r.relationGloss();
                if(interested.contains(relation)) {
                    System.err.println(r);
                }
            });
        }
    });
}
 
Example 15
Source Project: java_in_examples   Source File: StanfordCoreNLPTest.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] s) {
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    // read some text in the text variable
    String text = "\"But I do not want to go among mad people,\" Alice remarked.\n" +
            "\"Oh, you can not help that,\" said the Cat: \"we are all mad here. I am mad. You are mad.\"\n" +
            "\"How do you know I am mad?\" said Alice.\n" +
            "\"You must be,\" said the Cat, \"or you would not have come here.\" This is awful, bad, disgusting";

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);

    // run all Annotators on this text
    pipeline.annotate(document);

    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        String sentiment = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
        System.out.println(sentiment + "\t" + sentence);
    }
}
 
Example 16
Source Project: mateplus   Source File: FramatDemo.java    License: GNU General Public License v2.0 6 votes vote down vote up
public FramatDemo(String[] commandlineoptions) throws ZipException, ClassNotFoundException, IOException {
	FullPipelineOptions options = new CompletePipelineCMDLineOptions();
	options.parseCmdLineArgs(commandlineoptions); // process options
	
	Parse.parseOptions = options.getParseOptions();
	Parse.parseOptions.globalFeats = true; // activate additional global features
	
	// set glove directory if available		
	glove = (options.glovedir!=null)?new ExternalProcesses(options.glovedir):null;		
	
	Properties props = new Properties();
	props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
	preprocessor = new StanfordCoreNLP(props); // initialize preprocessing		
	mstparser = "nc " + options.mstserver;
	semafor = "nc " + options.semaforserver;
	
	pipeline = CompletePipeline.getCompletePipeline(options); // initialize pipeline	
}
 
Example 17
Source Project: mateplus   Source File: FramatHttpPipeline.java    License: GNU General Public License v2.0 6 votes vote down vote up
public FramatHttpPipeline(SemanticRoleLabeler srl, ImageCache imageCache,
		L l, int sentenceMaxLength, HttpOptions options) {
	super(sentenceMaxLength, options);
	
	semafor = "nc " + options.semaforserver;
	mstparser = "nc " + options.mstserver;
	
	this.srl = srl;
	this.defaultHandler = new DefaultHandler(l, this);
	this.imageCache = imageCache;

	Properties props = new Properties();
	props.put("annotators",
			"tokenize, ssplit, pos, lemma, ner, parse, dcoref");
	props.put("dcoref.sievePasses", "MarkRole," + "DiscourseMatch,"
			+ "ExactStringMatch," + "RelaxedExactStringMatch,"
			+ "PreciseConstructs," + "StrictHeadMatch1,"
			+ "StrictHeadMatch2," + "StrictHeadMatch3,"
			+ "StrictHeadMatch4," + "RelaxedHeadMatch");

	pipeline = new StanfordCoreNLP(props);

	if(options.glovedir!=null)
		glove = new ExternalProcesses(options.glovedir);
	
}
 
Example 18
Source Project: phrases   Source File: Postprocess.java    License: Apache License 2.0 6 votes vote down vote up
public List<Pattern> run(List<Pattern> patterns) {

        Properties props = new Properties();
        props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, sentiment");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

        for (Pattern pattern : patterns) {
            Annotation annotation = pipeline.process(pattern.toSentences());
            for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
                    Tree tree = sentence.get(SentimentCoreAnnotations.AnnotatedTree.class);
                    int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
                    for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
                        String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);

                    }
            }
        }
        return null;
    }
 
Example 19
Source Project: coreNlp   Source File: NlpOptionsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void tokenizationOnlyTest() {
    NlpOptions options = NlpOptions.tokenizationOnly(false);
    Properties props = options.getNlpProperties();
    assertTrue(props.getProperty("annotators").equals("tokenize, ssplit, pos"));
    assertFalse(options.lemmatisation);
    assertFalse(options.namedEntityRecognition);
    assertFalse(options.namedEntityRecognitionRegex);
    assertFalse(options.sentenceParser);
    assertFalse(options.coreferenceAnalysis);
    assertTrue(options.corefMaxSentenceDist == -1);
    assertFalse(options.corefPostProcessing);

    StanfordCoreNLP nlp = options.buildNlpAnalyzer();
    assertNotNull(nlp);
}
 
Example 20
Source Project: coreNlp   Source File: NlpOptionsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void tokenizationOnlyWithLemmaTest() {
    NlpOptions options = NlpOptions.tokenizationOnly(true);
    Properties props = options.getNlpProperties();
    assertTrue(props.getProperty("annotators").equals("tokenize, ssplit, pos, lemma"));
    assertTrue(options.lemmatisation);
    assertFalse(options.namedEntityRecognition);
    assertFalse(options.namedEntityRecognitionRegex);
    assertFalse(options.sentenceParser);
    assertFalse(options.coreferenceAnalysis);
    assertTrue(options.corefMaxSentenceDist == -1);
    assertFalse(options.corefPostProcessing);

    StanfordCoreNLP nlp = options.buildNlpAnalyzer();
    assertNotNull(nlp);
}
 
Example 21
Source Project: coreNlp   Source File: NlpOptionsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void namedEntityRecognitionTest() {
    NlpOptions options = NlpOptions.namedEntityRecognition(false, false);
    Properties props = options.getNlpProperties();
    assertTrue(props.getProperty("annotators").equals("tokenize, ssplit, pos, lemma, ner"));
    assertTrue(options.lemmatisation);
    assertTrue(options.namedEntityRecognition);
    assertFalse(options.namedEntityRecognitionRegex);
    assertFalse(options.sentenceParser);
    assertFalse(options.coreferenceAnalysis);
    assertTrue(options.corefMaxSentenceDist == -1);
    assertFalse(options.corefPostProcessing);

    StanfordCoreNLP nlp = options.buildNlpAnalyzer();
    assertNotNull(nlp);
}
 
Example 22
Source Project: coreNlp   Source File: NlpOptionsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void namedEntityRecognitionWithRegexTest() {
    NlpOptions options = NlpOptions.namedEntityRecognition(true, false);
    Properties props = options.getNlpProperties();
    assertTrue(props.getProperty("annotators").equals("tokenize, ssplit, pos, lemma, ner, regexner"));
    assertTrue(options.lemmatisation);
    assertTrue(options.namedEntityRecognition);
    assertTrue(options.namedEntityRecognitionRegex);
    assertFalse(options.sentenceParser);
    assertFalse(options.coreferenceAnalysis);
    assertTrue(options.corefMaxSentenceDist == -1);
    assertFalse(options.corefPostProcessing);

    StanfordCoreNLP nlp = options.buildNlpAnalyzer();
    assertNotNull(nlp);
}
 
Example 23
Source Project: coreNlp   Source File: NlpOptionsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void namedEntitiesWithCoreferenceAnalysisTest() {
    NlpOptions options = NlpOptions.namedEntitiesWithCoreferenceAnalysis(true, maxSentenceDist, false);
    Properties props = options.getNlpProperties();
    assertTrue(props.getProperty("annotators").equals("tokenize, ssplit, pos, lemma, ner, regexner, parse, dcoref"));
    assertTrue(options.lemmatisation);
    assertTrue(options.namedEntityRecognition);
    assertTrue(options.namedEntityRecognitionRegex);
    assertTrue(options.sentenceParser);
    assertTrue(options.coreferenceAnalysis);
    assertTrue(options.corefMaxSentenceDist == maxSentenceDist);
    assertFalse(options.corefPostProcessing);

    StanfordCoreNLP nlp = options.buildNlpAnalyzer();
    assertNotNull(nlp);
}
 
Example 24
Source Project: jstarcraft-nlp   Source File: CoreNlpTokenizerFactory.java    License: Apache License 2.0 5 votes vote down vote up
public CoreNlpTokenizerFactory(Map<String, String> configuration) {
    super(configuration);
    Properties properties = new Properties();
    for (Entry<String, String> term : configuration.entrySet()) {
        properties.setProperty(term.getKey(), term.getValue());
    }
    pipeline = new StanfordCoreNLP(properties);
}
 
Example 25
Source Project: jstarcraft-nlp   Source File: CoreNlpSegmentFactory.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Annotator build(Map<String, String> configurations) {
    Properties properties = new Properties();
    for (Entry<String, String> keyValue : configurations.entrySet()) {
        String key = keyValue.getKey();
        String value = keyValue.getValue();
        properties.put(key, value);
    }
    Annotator annotator = new StanfordCoreNLP(properties);
    return annotator;
}
 
Example 26
Source Project: jstarcraft-nlp   Source File: CoreNlpTokenizerTest.java    License: Apache License 2.0 5 votes vote down vote up
/** Test splitting only */
@Test
public void testBasic() throws IOException {
    AnnotationPipeline pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties(//
            "annotators", "tokenize,ssplit", //
            "tokenize.language", "en", //
            "tokenize.options", "americanize=true,asciiQuotes=true,ptb3Dashes=true,ptb3Ellipsis=true,untokenizable=noneKeep" //
    ));

    CoreNlpTokenizer tokenizer = new CoreNlpTokenizer(pipeline);
    String str = "Mary had a little lamb. And everywhere that Mary went, the lamb was sure to go.";
    tokenizer.setReader(new StringReader(str));
    assertTokenStreamContents(tokenizer, //
            new String[] { "Mary", "had", "a", "little", "lamb", ".", //
                    "And", "everywhere", "that", "Mary", "went", ",", //
                    "the", "lamb", "was", "sure", "to", "go", "." },
            // Start offsets:
            new int[] { 0, 5, 9, 11, 18, 22, //
                    24, 28, 39, 44, 49, 53, //
                    55, 59, 64, 68, 73, 76, 78 },
            // End offsets:
            new int[] { 4, 8, 10, 17, 22, 23, //
                    27, 38, 43, 48, 53, 54, //
                    58, 63, 67, 72, 75, 78, 79 },
            // Increments:
            new int[] { 1, 1, 1, 1, 1, 1, //
                    1 + CoreNlpTokenizer.SENTENCE_GAP, 1, 1, 1, 1, 1, //
                    1, 1, 1, 1, 1, 1, 1, 1 } //
    );
}
 
Example 27
Source Project: jstarcraft-nlp   Source File: CoreNlpTokenizerTest.java    License: Apache License 2.0 5 votes vote down vote up
/** Test with part of speech and lemmatization */
@Test
public void testWithLemma() throws IOException {
    AnnotationPipeline pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties(//
            "annotators", "tokenize,ssplit,pos,lemma", //
            "parse.model", "edu/stanford/nlp/models/srparser/englishSR.ser.gz", //
            "tokenize.language", "en", //
            "tokenize.options", "americanize=true,asciiQuotes=true,ptb3Dashes=true,ptb3Ellipsis=true,untokenizable=noneKeep" //
    ));

    CoreNlpTokenizer tokenizer = new CoreNlpTokenizer(pipeline);
    String str = "Mary had a little lamb. And everywhere that Mary went, the lamb was sure to go.";
    tokenizer.setReader(new StringReader(str));
    assertTokenStreamContents(tokenizer, //
            new String[] { "Mary", "have", "a", "little", "lamb", ".", //
                    "and", "everywhere", "that", "Mary", "go", ",", //
                    "the", "lamb", "be", "sure", "to", "go", "." },
            // Start offsets:
            new int[] { 0, 5, 9, 11, 18, 22, //
                    24, 28, 39, 44, 49, 53, //
                    55, 59, 64, 68, 73, 76, 78 },
            // End offsets:
            new int[] { 4, 8, 10, 17, 22, 23, //
                    27, 38, 43, 48, 53, 54, //
                    58, 63, 67, 72, 75, 78, 79 },
            // Types
            new String[] { "NNP", "VBD", "DT", "JJ", "NN", ".", //
                    "CC", "RB", "IN", "NNP", "VBD", ",", //
                    "DT", "NN", "VBD", "JJ", "TO", "VB", "." },
            // Increments:
            new int[] { 1, 1, 1, 1, 1, 1, //
                    1 + CoreNlpTokenizer.SENTENCE_GAP, 1, 1, 1, 1, 1, //
                    1, 1, 1, 1, 1, 1, 1, 1 } //
    );
}
 
Example 28
Source Project: jstarcraft-nlp   Source File: CoreNlpTokenizerTest.java    License: Apache License 2.0 5 votes vote down vote up
/** Test with NER */
@Test
public void testWithNER() throws IOException {
    AnnotationPipeline pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties(//
            "annotators", "tokenize,ssplit,pos,lemma,ner", //
            "parse.model", "edu/stanford/nlp/models/srparser/englishSR.ser.gz", //
            "tokenize.language", "en", //
            "tokenize.options", "americanize=true,asciiQuotes=true,ptb3Dashes=true,ptb3Ellipsis=true,untokenizable=noneKeep" //
    ));

    CoreNlpTokenizer tokenizer = new CoreNlpTokenizer(pipeline);
    String str = "Mary had a little lamb. And everywhere that Mary went, the lamb was sure to go.";
    tokenizer.setReader(new StringReader(str));
    assertTokenStreamContents(tokenizer, //
            new String[] { "Mary", "have", "a", "little", "lamb", ".", //
                    "and", "everywhere", "that", "Mary", "go", ",", //
                    "the", "lamb", "be", "sure", "to", "go", "." },
            // Start offsets:
            new int[] { 0, 5, 9, 11, 18, 22, //
                    24, 28, 39, 44, 49, 53, //
                    55, 59, 64, 68, 73, 76, 78 },
            // End offsets:
            new int[] { 4, 8, 10, 17, 22, 23, //
                    27, 38, 43, 48, 53, 54, //
                    58, 63, 67, 72, 75, 78, 79 },
            // Types
            new String[] { "PERSON", "VBD", "DT", "JJ", "NN", ".", //
                    "CC", "RB", "IN", "PERSON", "VBD", ",", //
                    "DT", "NN", "VBD", "JJ", "TO", "VB", "." },
            // Increments:
            new int[] { 1, 1, 1, 1, 1, 1, //
                    1 + CoreNlpTokenizer.SENTENCE_GAP, 1, 1, 1, 1, 1, //
                    1, 1, 1, 1, 1, 1, 1, 1 } //
    );
}
 
Example 29
Source Project: jstarcraft-nlp   Source File: CoreNlpSegmenterTestCase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected Tokenizer getSegmenter() {
    try {
        Properties properties = new Properties();
        properties.load(this.getClass().getResourceAsStream("/StanfordCoreNLP-chinese.properties"));
        StanfordCoreNLP annotator = new StanfordCoreNLP(properties);
        CoreNlpTokenizer tokenizer = new CoreNlpTokenizer(annotator);
        return tokenizer;
    } catch (Exception exception) {
        throw new RuntimeException(exception);
    }
}
 
Example 30
Source Project: jstarcraft-nlp   Source File: CoreNlpTokenizerTestCase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected NlpTokenizer<? extends NlpToken> getTokenizer() {
    try {
        Properties properties = new Properties();
        properties.load(this.getClass().getResourceAsStream("/StanfordCoreNLP-chinese.properties"));
        StanfordCoreNLP annotator = new StanfordCoreNLP(properties);
        return new CoreNlpTokenizer(annotator);
    } catch (Exception exception) {
        throw new RuntimeException(exception);
    }
}