opennlp.tools.namefind.NameFinderME Java Examples

The following examples show how to use opennlp.tools.namefind.NameFinderME. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: NamedEntityRecognitionUnitTest.java    From tutorials with MIT License 6 votes vote down vote up
@Test
public void givenEnglishPersonModel_whenNER_thenPersonsAreDetected() throws Exception {
    
    SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
    String[] tokens = tokenizer.tokenize("John is 26 years old. His best friend's name is Leonard. He has a sister named Penny.");
    
    InputStream inputStreamNameFinder = getClass().getResourceAsStream("/models/en-ner-person.bin");
    TokenNameFinderModel model = new TokenNameFinderModel(inputStreamNameFinder);
    NameFinderME nameFinderME = new NameFinderME(model);
    List<Span> spans = Arrays.asList(nameFinderME.find(tokens));
    assertThat(spans.toString()).isEqualTo("[[0..1) person, [13..14) person, [20..21) person]");
    List<String> names = new ArrayList<String>();
    int k = 0;
    for (Span s : spans) {
        names.add("");
        for (int index = s.getStart(); index < s.getEnd(); index++) {
            names.set(k, names.get(k) + tokens[index]);
        }
        k++;
    }
    assertThat(names).contains("John","Leonard","Penny");
}
 
Example #2
Source File: Chapter1.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 6 votes vote down vote up
private static void nameFinderExample() {
    try {
        String[] sentences = {
            "Tim was a good neighbor. Perhaps not as good a Bob "
            + "Haywood, but still pretty good. Of course Mr. Adam "
            + "took the cake!"};
        Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
        TokenNameFinderModel model = new TokenNameFinderModel(new File(
                "C:\\OpenNLP Models", "en-ner-person.bin"));
        NameFinderME finder = new NameFinderME(model);

        for (String sentence : sentences) {
            // Split the sentence into tokens
            String[] tokens = tokenizer.tokenize(sentence);

            // Find the names in the tokens and return Span objects
            Span[] nameSpans = finder.find(tokens);

            // Print the names extracted from the tokens using the Span data
            System.out.println(Arrays.toString(
                    Span.spansToStrings(nameSpans, tokens)));
        }
    } catch (IOException ex) {
        ex.printStackTrace();
    }
}
 
Example #3
Source File: OpenNlpDoccatRecommender.java    From inception with Apache License 2.0 6 votes vote down vote up
@Override
public void train(RecommenderContext aContext, List<CAS> aCasses)
    throws RecommendationException
{
    List<DocumentSample> docSamples = extractSamples(aCasses);
    
    if (docSamples.size() < 2) {
        LOG.info("Not enough training data: [{}] items", docSamples.size());
        return;
    }
    
    // The beam size controls how many results are returned at most. But even if the user
    // requests only few results, we always use at least the default bean size recommended by
    // OpenNLP
    int beamSize = Math.max(maxRecommendations, NameFinderME.DEFAULT_BEAM_SIZE);

    TrainingParameters params = traits.getParameters();
    params.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize));
    
    DoccatModel model = train(docSamples, params);
    
    aContext.put(KEY_MODEL, model);
}
 
Example #4
Source File: OpenNlpNerRecommender.java    From inception with Apache License 2.0 6 votes vote down vote up
@Override
public void train(RecommenderContext aContext, List<CAS> aCasses)
    throws RecommendationException
{
    List<NameSample> nameSamples = extractNameSamples(aCasses);
    
    if (nameSamples.size() < 2) {
        LOG.info("Not enough training data: [{}] items", nameSamples.size());
        return;
    }
    
    // The beam size controls how many results are returned at most. But even if the user
    // requests only few results, we always use at least the default bean size recommended by
    // OpenNLP
    int beamSize = Math.max(maxRecommendations, NameFinderME.DEFAULT_BEAM_SIZE);

    TrainingParameters params = traits.getParameters();
    params.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize));
    
    TokenNameFinderModel model = train(nameSamples, params);
    
    aContext.put(KEY_MODEL, model);
}
 
Example #5
Source File: TestNER.java    From Mutters with Apache License 2.0 6 votes vote down vote up
@Test
public void testAddressNER() throws Exception
{
  URL modelUrl = Thread.currentThread().getContextClassLoader().getResource("models/en-ner-address.bin");
  assertThat(modelUrl, is(notNullValue()));

  TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
  assertThat(model, is(notNullValue()));

  NameFinderME nameFinder = new NameFinderME(model);
  String[] tokens = SimpleTokenizer.INSTANCE.tokenize("Send a taxi to 12 Pleasent Street");
  Span[] spans = nameFinder.find(tokens);
  assertThat(spans.length, is(1));

  String[] locations = Span.spansToStrings(spans, tokens);
  assertThat(locations.length, is(1));
  assertThat(locations[0], is("12 Pleasent Street"));
}
 
Example #6
Source File: TestNER.java    From Mutters with Apache License 2.0 6 votes vote down vote up
@Test
public void testDateNER() throws Exception
{
  URL modelUrl = Thread.currentThread().getContextClassLoader().getResource("models/en-ner-dates.bin");
  assertThat(modelUrl, is(notNullValue()));

  TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
  assertThat(model, is(notNullValue()));

  NameFinderME nameFinder = new NameFinderME(model);
  String[] tokens = SimpleTokenizer.INSTANCE
      .tokenize("Mr. John Smith of New York, married Anne Green of London today.");
  assertThat(tokens.length, is(15));

  Span[] spans = nameFinder.find(tokens);
  assertThat(spans.length, is(1));

  String[] locations = Span.spansToStrings(spans, tokens);
  assertThat(locations.length, is(1));
  assertThat(locations[0], is("today"));
}
 
Example #7
Source File: TestNER.java    From Mutters with Apache License 2.0 6 votes vote down vote up
@Test
public void testLocationNER() throws Exception
{
  URL modelUrl = Thread.currentThread().getContextClassLoader().getResource("models/en-ner-locations.bin");
  assertThat(modelUrl, is(notNullValue()));

  TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
  assertThat(model, is(notNullValue()));

  NameFinderME nameFinder = new NameFinderME(model);
  String[] tokens = SimpleTokenizer.INSTANCE
      .tokenize("Mr. John Smith of New York, married Anne Green of London today.");
  assertThat(tokens.length, is(15));

  Span[] spans = nameFinder.find(tokens);
  assertThat(spans.length, is(2));

  String[] locations = Span.spansToStrings(spans, tokens);
  assertThat(locations.length, is(2));
  assertThat(locations[0], is("New York"));
  assertThat(locations[1], is("London"));
}
 
Example #8
Source File: NameFinderFactory.java    From wiseowl with MIT License 6 votes vote down vote up
protected void loadNameFinders(String language, String modelDirectory) throws IOException {
  //<start id="maxent.examples.namefinder.setup"/> 
  File modelFile;

  File[] models //<co id="nfe.findmodels"/>
    = findNameFinderModels(language, modelDirectory);
  modelNames = new String[models.length];
  finders = new NameFinderME[models.length];

  for (int fi = 0; fi < models.length; fi++) {
    modelFile = models[fi];
    modelNames[fi] = modelNameFromFile(language, modelFile); //<co id="nfe.modelname"/>
    
    log.info("Loading model {}", modelFile); 
    InputStream modelStream = new FileInputStream(modelFile);
    TokenNameFinderModel model = //<co id="nfe.modelreader"/>
        new TokenNameFinderModel(modelStream);
    finders[fi] = new NameFinderME(model);
    
  }

}
 
Example #9
Source File: TestNER.java    From Mutters with Apache License 2.0 6 votes vote down vote up
@Test
public void testPersonNER() throws Exception
{
  URL modelUrl = Thread.currentThread().getContextClassLoader().getResource("models/en-ner-persons.bin");
  assertThat(modelUrl, is(notNullValue()));

  TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
  assertThat(model, is(notNullValue()));

  NameFinderME nameFinder = new NameFinderME(model);
  String[] tokens = SimpleTokenizer.INSTANCE
      .tokenize("Mr. John Smith of New York, married Anne Green of London today.");
  assertThat(tokens.length, is(15));

  Span[] spans = nameFinder.find(tokens);
  assertThat(spans.length, is(2));

  String[] names = Span.spansToStrings(spans, tokens);
  assertThat(names.length, is(2));
  assertThat(names[0], is("John Smith"));
  assertThat(names[1], is("Anne Green"));
}
 
Example #10
Source File: OpenNlpService.java    From elasticsearch-ingest-opennlp with Apache License 2.0 6 votes vote down vote up
public ExtractedEntities find(String content, String field) {
    try {
        if (!nameFinderModels.containsKey(field)) {
            throw new ElasticsearchException("Could not find fieldĀ [{}], possible values {}", field, nameFinderModels.keySet());
        }
        TokenNameFinderModel finderModel = nameFinderModels.get(field);
        if (threadLocal.get() == null || !threadLocal.get().equals(finderModel)) {
            threadLocal.set(finderModel);
        }

        String[] tokens = SimpleTokenizer.INSTANCE.tokenize(content);
        Span[] spans = new NameFinderME(finderModel).find(tokens);

        return new ExtractedEntities(tokens, spans);
    } finally {
        threadLocal.remove();
    }
}
 
Example #11
Source File: Chapter4.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 5 votes vote down vote up
private static void usingMultipleNERModels() {
    // Models - en-ner-person.bin en-ner-location.bin en-ner-money.bin 
    // en-ner-organization.bin en-ner-time.bin
    try {
        InputStream tokenStream = new FileInputStream(
                new File(getModelDir(), "en-token.bin"));

        TokenizerModel tokenModel = new TokenizerModel(tokenStream);
        Tokenizer tokenizer = new TokenizerME(tokenModel);

        String modelNames[] = {"en-ner-person.bin", "en-ner-location.bin",
            "en-ner-organization.bin"};
        ArrayList<String> list = new ArrayList();
        for (String name : modelNames) {
            TokenNameFinderModel entityModel = new TokenNameFinderModel(
                    new FileInputStream(
                            new File(getModelDir(), name)));
            NameFinderME nameFinder = new NameFinderME(entityModel);
            for (int index = 0; index < sentences.length; index++) {
                String tokens[] = tokenizer.tokenize(sentences[index]);
                Span nameSpans[] = nameFinder.find(tokens);
                for (Span span : nameSpans) {
                    list.add("Sentence: " + index
                            + " Span: " + span.toString() + " Entity: "
                            + tokens[span.getStart()]);
                }
            }
        }
        System.out.println("Multiple Entities");
        for (String element : list) {
            System.out.println(element);
        }
    } catch (Exception ex) {
        ex.printStackTrace();
    }
}
 
Example #12
Source File: NERScorer.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
public Parse[] parsePassageText(String p) throws InvalidFormatException{
	if (!modelsAreInitialized)init();
	//initialize 	 
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	NameFinderME nameFinder = new NameFinderME(this.nerModel);
	Parser parser = ParserFactory.create(
			this.parserModel,
			20, // beam size
			0.95); // advance percentage
	//find sentences, tokenize each, parse each, return top parse for each 	 	 
	String[] sentences = sentenceDetector.sentDetect(p);
	Parse[] results = new Parse[sentences.length];
	for (int i=0;i<sentences.length;i++){
		//String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
		
		//StringTokenizer st = new StringTokenizer(tks[i]); 
		//There are several tokenizers available. SimpleTokenizer works best
		Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
		for (int si = 0; si < sentences.length; si++) {
	        Span[] tokenSpans = tokenizer.tokenizePos(sentences[si]);
	        String[] tokens = Span.spansToStrings(tokenSpans, sentences[si]);
	        Span[] names = nameFinder.find(tokens);
	        for (int ni = 0; ni < names.length; ni++) {
	            Span startSpan = tokenSpans[names[ni].getStart()];
	            int nameStart = startSpan.getStart();
	            Span endSpan = tokenSpans[names[ni].getEnd() - 1];
	            int nameEnd = endSpan.getEnd();
	            String name = sentences[si].substring(nameStart, nameEnd);
	            System.out.println(name);
	        }
	    }
		String sent= StringUtils.join(tokenizer," ");
		System.out.println("Found sentence " + sent);
		Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
		results[i]=sentResults[0];
	}
	return results;
}
 
Example #13
Source File: BasicActions.java    From knowledge-extraction with Apache License 2.0 5 votes vote down vote up
@Test
public void testNameFinder(){
	try (InputStream modelIn = BasicActions.class.getClassLoader()
				.getResourceAsStream(Consts.EN_NER_MODEL);){
		
		TokenNameFinderModel model = new TokenNameFinderModel(modelIn);
		NameFinderME nameFinder = new NameFinderME(model);
		Span nameSpans[] = nameFinder.find(testTokenizer());
		System.out.println(Arrays.toString(nameSpans));
		
	} catch (IOException e) {
		e.printStackTrace();
	}
}
 
Example #14
Source File: NameFilter.java    From wiseowl with MIT License 5 votes vote down vote up
public NameFilter(TokenStream in,String[] modelNames, NameFinderME[] finders) {
  super(in);
  this.tokenizer = SimpleTokenizer.INSTANCE;
  this.finders = finders;
  this.tokenTypeNames = new String[modelNames.length];
  for (int i=0; i < modelNames.length; i++) {
    tokenTypeNames[i] = NE_PREFIX + modelNames[i];
  }
}
 
Example #15
Source File: OpenNlpNerRecommender.java    From inception with Apache License 2.0 5 votes vote down vote up
private TokenNameFinderModel train(List<NameSample> aNameSamples,
        TrainingParameters aParameters)
    throws RecommendationException
{
    try (NameSampleStream stream = new NameSampleStream(aNameSamples)) {
        TokenNameFinderFactory finderFactory = new TokenNameFinderFactory();
        return NameFinderME.train("unknown", null, stream, aParameters, finderFactory);
    } catch (IOException e) {
        LOG.error("Exception during training the OpenNLP Named Entity Recognizer model.", e);
        throw new RecommendationException("Error while training OpenNLP pos", e);
    }
}
 
Example #16
Source File: Chapter4.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 5 votes vote down vote up
private static void trainingOpenNLPNERModel() {
    try (OutputStream modelOutputStream = new BufferedOutputStream(
            new FileOutputStream(new File("modelFile")));) {
        ObjectStream<String> lineStream = new PlainTextByLineStream(
                new FileInputStream("en-ner-person.train"), "UTF-8");
        ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream);

        TokenNameFinderModel model = NameFinderME.train("en", "person", sampleStream,
                null, 100, 5);

        model.serialize(modelOutputStream);
    } catch (IOException ex) {
        ex.printStackTrace();
    }
}
 
Example #17
Source File: Chapter4.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 5 votes vote down vote up
private static void usingMultipleNERModels() {
    // Models - en-ner-person.bin en-ner-location.bin en-ner-money.bin 
    // en-ner-organization.bin en-ner-time.bin
    try {
        InputStream tokenStream = new FileInputStream(
                new File(getModelDir(), "en-token.bin"));

        TokenizerModel tokenModel = new TokenizerModel(tokenStream);
        Tokenizer tokenizer = new TokenizerME(tokenModel);

        String modelNames[] = {"en-ner-person.bin", "en-ner-location.bin",
            "en-ner-organization.bin"};
        ArrayList<String> list = new ArrayList();
        for (String name : modelNames) {
            TokenNameFinderModel entityModel = new TokenNameFinderModel(
                    new FileInputStream(
                            new File(getModelDir(), name)));
            NameFinderME nameFinder = new NameFinderME(entityModel);
            for (int index = 0; index < sentences.length; index++) {
                String tokens[] = tokenizer.tokenize(sentences[index]);
                Span nameSpans[] = nameFinder.find(tokens);
                for (Span span : nameSpans) {
                    list.add("Sentence: " + index
                            + " Span: " + span.toString() + " Entity: "
                            + tokens[span.getStart()]);
                }
            }
        }
        System.out.println("Multiple Entities");
        for (String element : list) {
            System.out.println(element);
        }
    } catch (Exception ex) {
        ex.printStackTrace();
    }
}
 
Example #18
Source File: NLPNERTaggerOp.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public NLPNERTaggerOp(TokenNameFinderModel model) {
  this.nameFinder = new NameFinderME(model);
}
 
Example #19
Source File: OpenNlpService.java    From elasticsearch-ingest-opennlp with Apache License 2.0 4 votes vote down vote up
static String createAnnotatedText(String content, List<ExtractedEntities> extractedEntities) {
    // these spans contain the real offset of each word in start/end variables!
    // the spans of the method argument contain the offset of each token, as mentioned in tokens!
    Span[] spansWithRealOffsets = SimpleTokenizer.INSTANCE.tokenizePos(content);

    List<Span> spansList = new ArrayList<>();
    extractedEntities.stream()
            .map(ExtractedEntities::getSpans)
            .forEach(s -> spansList.addAll(Arrays.asList(s)));

    Span[] spans = NameFinderME.dropOverlappingSpans(spansList.toArray(new Span[0]));
    String[] tokens = extractedEntities.get(0).getTokens();

    // shortcut if there is no enrichment to be done
    if (spans.length == 0) {
        return content;
    }

    StringBuilder builder = new StringBuilder();
    for (int i = 0; i < tokens.length; i++) {
        final int idx = i;
        String token = tokens[i];

        final Optional<Span> optionalSpan = Arrays.stream(spans).filter(s -> s.getStart() == idx).findFirst();
        if (optionalSpan.isPresent()) {
            Span span = optionalSpan.get();
            int start = span.getStart();
            int end = span.getEnd();
            String type = span.getType();

            String[] spanTokens = new String[end - start];
            int spanPosition = 0;
            for (int tokenPosition = start ; tokenPosition < end; tokenPosition++) {
                spanTokens[spanPosition++] = tokens[tokenPosition];
            }
            String entityString = Strings.arrayToDelimitedString(spanTokens, " ");

            builder.append("[");
            builder.append(entityString);
            builder.append("](");
            builder.append(Strings.capitalize(type));
            builder.append("_");
            builder.append(entityString);
            builder.append(")");
            i = end - 1;
        } else {
            builder.append(token);
        }

        // only append a whitespace, if the offsets actually differ
        if (i < tokens.length - 1) {
            if (spansWithRealOffsets[i].getEnd() != spansWithRealOffsets[i+1].getStart()) {
                builder.append(" ");
            }
        }
    }

    return builder.toString();
}
 
Example #20
Source File: NameFinderFactory.java    From wiseowl with MIT License 4 votes vote down vote up
/** Obtain a reference to the array of NameFinderME's loaded by the engine. 
 * @return
 */
public NameFinderME[] getNameFinders() {
  return finders;
}
 
Example #21
Source File: OpenNlpNerRecommender.java    From inception with Apache License 2.0 4 votes vote down vote up
@Override
public EvaluationResult evaluate(List<CAS> aCasses, DataSplitter aDataSplitter)
    throws RecommendationException
{
    List<NameSample> data = extractNameSamples(aCasses);
    List<NameSample> trainingSet = new ArrayList<>();
    List<NameSample> testSet = new ArrayList<>();

    for (NameSample nameSample : data) {
        switch (aDataSplitter.getTargetSet(nameSample)) {
        case TRAIN:
            trainingSet.add(nameSample);
            break;
        case TEST:
            testSet.add(nameSample);
            break;
        default:
            // Do nothing
            break;
        }            
    }
    
    int testSetSize = testSet.size();
    int trainingSetSize = trainingSet.size();
    double overallTrainingSize = data.size() - testSetSize;
    double trainRatio = (overallTrainingSize > 0) ? trainingSetSize / overallTrainingSize : 0.0;

    if (trainingSetSize < 2 || testSetSize < 2) {
        String info = String.format(
                "Not enough evaluation data: training set [%s] items, test set [%s] of total [%s]",
                trainingSetSize, testSetSize, data.size());
        LOG.info(info);
        
        EvaluationResult result = new EvaluationResult(trainingSetSize,
                testSetSize, trainRatio);
        result.setEvaluationSkipped(true);
        result.setErrorMsg(info);
        return result;
    }

    LOG.info("Training on [{}] items, predicting on [{}] of total [{}]", trainingSet.size(),
            testSet.size(), data.size());

    // Train model
    TokenNameFinderModel model = train(trainingSet, traits.getParameters());
    NameFinderME nameFinder = new NameFinderME(model);

    // Evaluate
    List<LabelPair> labelPairs = new ArrayList<>();
    for (NameSample sample : testSet) {
        // clear adaptive data from feature generators if necessary
        if (sample.isClearAdaptiveDataSet()) {
            nameFinder.clearAdaptiveData();
        }

        // Span contains one NE, Array of them all in one sentence
        String[] sentence = sample.getSentence();
        Span[] predictedNames = nameFinder.find(sentence);
        Span[] goldNames = sample.getNames();

        labelPairs.addAll(determineLabelsForASentence(sentence, predictedNames,
                goldNames));

    }

    return labelPairs.stream().collect(EvaluationResult
            .collector(trainingSetSize, testSetSize, trainRatio, NO_NE_TAG));
}
 
Example #22
Source File: OpenNlpNerRecommender.java    From inception with Apache License 2.0 4 votes vote down vote up
@Override
public void predict(RecommenderContext aContext, CAS aCas) throws RecommendationException
{
    TokenNameFinderModel model = aContext.get(KEY_MODEL).orElseThrow(() -> 
            new RecommendationException("Key [" + KEY_MODEL + "] not found in context"));
    
    NameFinderME finder = new NameFinderME(model);

    Type sentenceType = getType(aCas, Sentence.class);
    Type tokenType = getType(aCas, Token.class);
    Type predictedType = getPredictedType(aCas);

    Feature predictedFeature = getPredictedFeature(aCas);
    Feature isPredictionFeature = getIsPredictionFeature(aCas);
    Feature scoreFeature = getScoreFeature(aCas);

    int predictionCount = 0;
    for (AnnotationFS sentence : select(aCas, sentenceType)) {
        if (predictionCount >= traits.getPredictionLimit()) {
            break;
        }
        predictionCount++;
        
        List<AnnotationFS> tokenAnnotations = selectCovered(tokenType, sentence);
        String[] tokens = tokenAnnotations.stream()
            .map(AnnotationFS::getCoveredText)
            .toArray(String[]::new);

        for (Span prediction : finder.find(tokens)) {
            String label = prediction.getType();
            if (NameSample.DEFAULT_TYPE.equals(label)) {
                continue;
            }
            int begin = tokenAnnotations.get(prediction.getStart()).getBegin();
            int end = tokenAnnotations.get(prediction.getEnd() - 1).getEnd();
            AnnotationFS annotation = aCas.createAnnotation(predictedType, begin, end);
            annotation.setStringValue(predictedFeature, label);
            annotation.setDoubleValue(scoreFeature, prediction.getProb());
            annotation.setBooleanValue(isPredictionFeature, true);

            aCas.addFsToIndexes(annotation);
        }
    }
}
 
Example #23
Source File: NERDemo.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 4 votes vote down vote up
public static void main(String args[]){
    String sentences[] = {"Joe was the last person to see Fred. ", 
        "He saw him in Boston at McKenzie's pub at 3:00 where he " 
        + " paid $2.45 for an ale. ", 
        "Joe wanted to go to Vermont for the day to visit a cousin who " 
        + "works at IBM, but Sally and he had to look for Fred"}; 
    String sentence = "He was the last person to see Fred."; 
    try
    {
        InputStream tokenStream = new FileInputStream(new File(getResourcePath()+ "en-token.bin"));
        InputStream modelStream = new FileInputStream(new File(getResourcePath() + "en-ner-person.bin"));
        TokenizerModel tokenModel = new TokenizerModel(tokenStream);
        Tokenizer tokenizer = new TokenizerME(tokenModel);
        TokenNameFinderModel entityModel = new TokenNameFinderModel(modelStream);
        NameFinderME nameFinder = new NameFinderME(entityModel);
        String tokens1[] = tokenizer.tokenize(sentence);
        Span nameSpans1[] = nameFinder.find(tokens1);
        for (int i = 0; i < nameSpans1.length; i++) { 
            System.out.println("Span: " + nameSpans1[i].toString()); 
            System.out.println("Entity: " 
                + tokens1[nameSpans1[i].getStart()]); 
        } 
        
        System.out.println("---------- Multiple Sentences -----------");
        for (String sentence1 : sentences) { 
            String tokens[] = tokenizer.tokenize(sentence1); 
            Span nameSpans[] = nameFinder.find(tokens); 
            for (int i = 0; i < nameSpans.length; i++) { 
                System.out.println("Span: " + nameSpans[i].toString()); 
                System.out.println("Entity: "  
                    + tokens[nameSpans[i].getStart()]); 
            } 
            System.out.println(); 
        } 
        
    }
    catch(Exception e){
        System.out.println(e);
    }
}
 
Example #24
Source File: Discoverer.java    From DataDefender with Apache License 2.0 4 votes vote down vote up
private Model createModelFrom(TokenNameFinderModel tnf, String modelName) {
    NameFinderME nameFinder = new NameFinderME(tnf);
    return new Model(tokenizer, nameFinder, modelName);
}
 
Example #25
Source File: Model.java    From DataDefender with Apache License 2.0 4 votes vote down vote up
public Model(final Tokenizer tokenizer, final NameFinderME nameFinder, final String name) {
    this.name       = name;
    this.tokenizer  = tokenizer;
    this.nameFinder = nameFinder;
}
 
Example #26
Source File: Model.java    From DataDefender with Apache License 2.0 4 votes vote down vote up
public NameFinderME getNameFinder() {
    return this.nameFinder;
}
 
Example #27
Source File: NETagger.java    From OpenEphyra with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Performs named entity tagging on an array of full parses of sentences.
 * 
 * @param parses array of full parses of sentences
 */
// TODO only works with OpenNLP taggers so far
@SuppressWarnings("unchecked")
public static void tagNes(Parse[] parses) {
	String[] results = new String[parses.length];
	for (int s = 0; s < results.length; s++) results[s] = "";
	
	// initialize prevTokenMaps
	Map[] prevTokenMaps = new HashMap[finders.length];
	for (int i = 0; i < finders.length; i++)
		prevTokenMaps[i] = new HashMap();
	
	for (Parse parse : parses) {
		// get tokens
		Parse[] tokens = parse.getTagNodes();
		
		// find named entites
		String[][] finderTags = new String[finders.length][];
		for (int i = 0; i < finders.length; i++)
			finderTags[i] = finders[i].find(tokens, prevTokenMaps[i]);
		
		// update prevTokenMaps
		for (int i = 0; i < prevTokenMaps.length; i++)
			for (int j = 0; j < tokens.length; j++)
				prevTokenMaps[i].put(tokens[j], finderTags[i][j]);
		
		for (int i = 0; i < finders.length; i++) {
			int start = -1;
			List<Span> names = new ArrayList<Span>(5);
			
			// determine spans of tokens that are named entities
			for (int j = 0; j < tokens.length; j++) {
				if ((finderTags[i][j].equals(NameFinderME.START) ||
					 finderTags[i][j].equals(NameFinderME.OTHER))) {
					if (start != -1) names.add(new Span(start, j - 1));
					start = -1;
				}
				if (finderTags[i][j].equals(NameFinderME.START)) start = j;
			}
			if (start != -1) names.add(new Span(start, tokens.length - 1));
			
			// add name entity information to parse
			addNames(finderNames[i], names, tokens);
		}
    }
}