Java Code Examples for opennlp.tools.util.Span#getType()

The following examples show how to use opennlp.tools.util.Span#getType() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OpenNlpNerRecommender.java    From inception with Apache License 2.0 5 votes vote down vote up
/**
 * Check that token index is part of the given span and return the span's label 
 * or no-label (token is outside span). 
 */
private String determineLabel(Span aName, int aTokenIdx)
{
    String label = NO_NE_TAG;

    if (aName.getStart() <= aTokenIdx && aName.getEnd() > aTokenIdx) {
        label = aName.getType();
    }

    return label;
}
 
Example 2
Source File: OpenNlpNerRecommender.java    From inception with Apache License 2.0 4 votes vote down vote up
@Override
public void predict(RecommenderContext aContext, CAS aCas) throws RecommendationException
{
    TokenNameFinderModel model = aContext.get(KEY_MODEL).orElseThrow(() -> 
            new RecommendationException("Key [" + KEY_MODEL + "] not found in context"));
    
    NameFinderME finder = new NameFinderME(model);

    Type sentenceType = getType(aCas, Sentence.class);
    Type tokenType = getType(aCas, Token.class);
    Type predictedType = getPredictedType(aCas);

    Feature predictedFeature = getPredictedFeature(aCas);
    Feature isPredictionFeature = getIsPredictionFeature(aCas);
    Feature scoreFeature = getScoreFeature(aCas);

    int predictionCount = 0;
    for (AnnotationFS sentence : select(aCas, sentenceType)) {
        if (predictionCount >= traits.getPredictionLimit()) {
            break;
        }
        predictionCount++;
        
        List<AnnotationFS> tokenAnnotations = selectCovered(tokenType, sentence);
        String[] tokens = tokenAnnotations.stream()
            .map(AnnotationFS::getCoveredText)
            .toArray(String[]::new);

        for (Span prediction : finder.find(tokens)) {
            String label = prediction.getType();
            if (NameSample.DEFAULT_TYPE.equals(label)) {
                continue;
            }
            int begin = tokenAnnotations.get(prediction.getStart()).getBegin();
            int end = tokenAnnotations.get(prediction.getEnd() - 1).getEnd();
            AnnotationFS annotation = aCas.createAnnotation(predictedType, begin, end);
            annotation.setStringValue(predictedFeature, label);
            annotation.setDoubleValue(scoreFeature, prediction.getProb());
            annotation.setBooleanValue(isPredictionFeature, true);

            aCas.addFsToIndexes(annotation);
        }
    }
}
 
Example 3
Source File: OpenNlpService.java    From elasticsearch-ingest-opennlp with Apache License 2.0 4 votes vote down vote up
static String createAnnotatedText(String content, List<ExtractedEntities> extractedEntities) {
    // these spans contain the real offset of each word in start/end variables!
    // the spans of the method argument contain the offset of each token, as mentioned in tokens!
    Span[] spansWithRealOffsets = SimpleTokenizer.INSTANCE.tokenizePos(content);

    List<Span> spansList = new ArrayList<>();
    extractedEntities.stream()
            .map(ExtractedEntities::getSpans)
            .forEach(s -> spansList.addAll(Arrays.asList(s)));

    Span[] spans = NameFinderME.dropOverlappingSpans(spansList.toArray(new Span[0]));
    String[] tokens = extractedEntities.get(0).getTokens();

    // shortcut if there is no enrichment to be done
    if (spans.length == 0) {
        return content;
    }

    StringBuilder builder = new StringBuilder();
    for (int i = 0; i < tokens.length; i++) {
        final int idx = i;
        String token = tokens[i];

        final Optional<Span> optionalSpan = Arrays.stream(spans).filter(s -> s.getStart() == idx).findFirst();
        if (optionalSpan.isPresent()) {
            Span span = optionalSpan.get();
            int start = span.getStart();
            int end = span.getEnd();
            String type = span.getType();

            String[] spanTokens = new String[end - start];
            int spanPosition = 0;
            for (int tokenPosition = start ; tokenPosition < end; tokenPosition++) {
                spanTokens[spanPosition++] = tokens[tokenPosition];
            }
            String entityString = Strings.arrayToDelimitedString(spanTokens, " ");

            builder.append("[");
            builder.append(entityString);
            builder.append("](");
            builder.append(Strings.capitalize(type));
            builder.append("_");
            builder.append(entityString);
            builder.append(")");
            i = end - 1;
        } else {
            builder.append(token);
        }

        // only append a whitespace, if the offsets actually differ
        if (i < tokens.length - 1) {
            if (spansWithRealOffsets[i].getEnd() != spansWithRealOffsets[i+1].getStart()) {
                builder.append(" ");
            }
        }
    }

    return builder.toString();
}