Java Code Examples for edu.stanford.nlp.util.PropertiesUtils#asProperties()

The following examples show how to use edu.stanford.nlp.util.PropertiesUtils#asProperties() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CoreNlpTokenizerTest.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
/** Test splitting only */
@Test
public void testBasic() throws IOException {
    AnnotationPipeline pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties(//
            "annotators", "tokenize,ssplit", //
            "tokenize.language", "en", //
            "tokenize.options", "americanize=true,asciiQuotes=true,ptb3Dashes=true,ptb3Ellipsis=true,untokenizable=noneKeep" //
    ));

    CoreNlpTokenizer tokenizer = new CoreNlpTokenizer(pipeline);
    String str = "Mary had a little lamb. And everywhere that Mary went, the lamb was sure to go.";
    tokenizer.setReader(new StringReader(str));
    assertTokenStreamContents(tokenizer, //
            new String[] { "Mary", "had", "a", "little", "lamb", ".", //
                    "And", "everywhere", "that", "Mary", "went", ",", //
                    "the", "lamb", "was", "sure", "to", "go", "." },
            // Start offsets:
            new int[] { 0, 5, 9, 11, 18, 22, //
                    24, 28, 39, 44, 49, 53, //
                    55, 59, 64, 68, 73, 76, 78 },
            // End offsets:
            new int[] { 4, 8, 10, 17, 22, 23, //
                    27, 38, 43, 48, 53, 54, //
                    58, 63, 67, 72, 75, 78, 79 },
            // Increments:
            new int[] { 1, 1, 1, 1, 1, 1, //
                    1 + CoreNlpTokenizer.SENTENCE_GAP, 1, 1, 1, 1, 1, //
                    1, 1, 1, 1, 1, 1, 1, 1 } //
    );
}
 
Example 2
Source File: CoreNlpTokenizerTest.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
/** Test with part of speech and lemmatization */
@Test
public void testWithLemma() throws IOException {
    AnnotationPipeline pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties(//
            "annotators", "tokenize,ssplit,pos,lemma", //
            "parse.model", "edu/stanford/nlp/models/srparser/englishSR.ser.gz", //
            "tokenize.language", "en", //
            "tokenize.options", "americanize=true,asciiQuotes=true,ptb3Dashes=true,ptb3Ellipsis=true,untokenizable=noneKeep" //
    ));

    CoreNlpTokenizer tokenizer = new CoreNlpTokenizer(pipeline);
    String str = "Mary had a little lamb. And everywhere that Mary went, the lamb was sure to go.";
    tokenizer.setReader(new StringReader(str));
    assertTokenStreamContents(tokenizer, //
            new String[] { "Mary", "have", "a", "little", "lamb", ".", //
                    "and", "everywhere", "that", "Mary", "go", ",", //
                    "the", "lamb", "be", "sure", "to", "go", "." },
            // Start offsets:
            new int[] { 0, 5, 9, 11, 18, 22, //
                    24, 28, 39, 44, 49, 53, //
                    55, 59, 64, 68, 73, 76, 78 },
            // End offsets:
            new int[] { 4, 8, 10, 17, 22, 23, //
                    27, 38, 43, 48, 53, 54, //
                    58, 63, 67, 72, 75, 78, 79 },
            // Types
            new String[] { "NNP", "VBD", "DT", "JJ", "NN", ".", //
                    "CC", "RB", "IN", "NNP", "VBD", ",", //
                    "DT", "NN", "VBD", "JJ", "TO", "VB", "." },
            // Increments:
            new int[] { 1, 1, 1, 1, 1, 1, //
                    1 + CoreNlpTokenizer.SENTENCE_GAP, 1, 1, 1, 1, 1, //
                    1, 1, 1, 1, 1, 1, 1, 1 } //
    );
}
 
Example 3
Source File: CoreNlpTokenizerTest.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
/** Test with NER */
@Test
public void testWithNER() throws IOException {
    AnnotationPipeline pipeline = new StanfordCoreNLP(PropertiesUtils.asProperties(//
            "annotators", "tokenize,ssplit,pos,lemma,ner", //
            "parse.model", "edu/stanford/nlp/models/srparser/englishSR.ser.gz", //
            "tokenize.language", "en", //
            "tokenize.options", "americanize=true,asciiQuotes=true,ptb3Dashes=true,ptb3Ellipsis=true,untokenizable=noneKeep" //
    ));

    CoreNlpTokenizer tokenizer = new CoreNlpTokenizer(pipeline);
    String str = "Mary had a little lamb. And everywhere that Mary went, the lamb was sure to go.";
    tokenizer.setReader(new StringReader(str));
    assertTokenStreamContents(tokenizer, //
            new String[] { "Mary", "have", "a", "little", "lamb", ".", //
                    "and", "everywhere", "that", "Mary", "go", ",", //
                    "the", "lamb", "be", "sure", "to", "go", "." },
            // Start offsets:
            new int[] { 0, 5, 9, 11, 18, 22, //
                    24, 28, 39, 44, 49, 53, //
                    55, 59, 64, 68, 73, 76, 78 },
            // End offsets:
            new int[] { 4, 8, 10, 17, 22, 23, //
                    27, 38, 43, 48, 53, 54, //
                    58, 63, 67, 72, 75, 78, 79 },
            // Types
            new String[] { "PERSON", "VBD", "DT", "JJ", "NN", ".", //
                    "CC", "RB", "IN", "PERSON", "VBD", ",", //
                    "DT", "NN", "VBD", "JJ", "TO", "VB", "." },
            // Increments:
            new int[] { 1, 1, 1, 1, 1, 1, //
                    1 + CoreNlpTokenizer.SENTENCE_GAP, 1, 1, 1, 1, 1, //
                    1, 1, 1, 1, 1, 1, 1, 1 } //
    );
}
 
Example 4
Source File: LogicAnalysisTool.java    From Criteria2Query with Apache License 2.0 4 votes vote down vote up
public LogicAnalysisTool() {
	Properties properties = PropertiesUtils.asProperties("annotators", "tokenize,ssplit,pos,lemma,depparse");
	// properties.setProperty("depparse.language", "English");
	pipeline = new StanfordCoreNLP(properties);

}