package de.mirkosertic.desktopsearch; import edu.stanford.nlp.pipeline.CoreDocument; import edu.stanford.nlp.pipeline.CoreEntityMention; import edu.stanford.nlp.pipeline.StanfordCoreNLP; import org.junit.Test; import java.io.IOException; import java.util.Properties; import java.util.stream.Collectors; public class NERPipelineTest { @Test public void testGerman() throws IOException { // set up pipeline properties final Properties props = new Properties(); props.load(NERPipelineTest.class.getResourceAsStream("/StanfordCoreNLP-german.properties")); props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner"); props.setProperty("ner.useSUTime", "false"); props.setProperty("ner.applyFineGrained", "false"); // set up pipeline System.out.println("A"); final StanfordCoreNLP pipeline = new StanfordCoreNLP(props); // make an example document System.out.println("B"); //CoreDocument doc = new CoreDocument("Mirko Sertic is living in Münster, Germany. He is almost 40 years old. He likes movies. It is strange how things go wrong in New York City."); final CoreDocument doc = new CoreDocument("Mirko Sertic lebt und wohnt in der Stadt Münster, Deutschland."); // annotate the document System.out.println("C"); pipeline.annotate(doc); // view results System.out.println("D"); System.out.println("---"); System.out.println("entities found"); for (final CoreEntityMention em : doc.entityMentions()) System.out.println("\tdetected entity: \t"+em.text()+"\t"+em.entityType()); System.out.println("---"); System.out.println("tokens and ner tags"); final String tokensAndNERTags = doc.tokens().stream().map(token -> "("+token.word()+","+token.ner()+")").collect( Collectors.joining(" ")); System.out.println(tokensAndNERTags); } @Test public void testEnglish() throws IOException { // set up pipeline properties final Properties props = new Properties(); props.load(NERPipelineTest.class.getResourceAsStream("/StanfordCoreNLP.properties")); props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner"); props.setProperty("ner.useSUTime", "false"); props.setProperty("ner.applyFineGrained", "false"); // set up pipeline System.out.println("A"); final StanfordCoreNLP pipeline = new StanfordCoreNLP(props); // make an example document System.out.println("B"); //CoreDocument doc = new CoreDocument("Mirko Sertic is living in Münster, Germany. He is almost 40 years old. He likes movies. It is strange how things go wrong in New York City."); final CoreDocument doc = new CoreDocument("Mirko Sertic lebt und wohnt in der Stadt Münster, Deutschland."); // annotate the document System.out.println("C"); pipeline.annotate(doc); // view results System.out.println("D"); System.out.println("---"); System.out.println("entities found"); for (final CoreEntityMention em : doc.entityMentions()) System.out.println("\tdetected entity: \t"+em.text()+"\t"+em.entityType()); System.out.println("---"); System.out.println("tokens and ner tags"); final String tokensAndNERTags = doc.tokens().stream().map(token -> "("+token.word()+","+token.ner()+")").collect( Collectors.joining(" ")); System.out.println(tokensAndNERTags); } }