Java Code Examples for org.apache.uima.cas.text.AnnotationFS#getFeatureValueAsString()

The following examples show how to use org.apache.uima.cas.text.AnnotationFS#getFeatureValueAsString() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataMajorityNerRecommender.java    From inception with Apache License 2.0 6 votes vote down vote up
private List<Annotation> extractAnnotations(List<CAS> aCasses)
{
    List<Annotation> annotations = new ArrayList<>();

    for (CAS cas : aCasses) {
        Type annotationType = CasUtil.getType(cas, layerName);
        Feature predictedFeature = annotationType.getFeatureByBaseName(featureName);

        for (AnnotationFS ann : CasUtil.select(cas, annotationType)) {
            String label = ann.getFeatureValueAsString(predictedFeature);
            if (isNotEmpty(label)) {
                annotations.add(new Annotation(label, ann.getBegin(), ann.getEnd()));
            }
        }
    }

    return annotations;
}
 
Example 2
Source File: StringMatchingRecommender.java    From inception with Apache License 2.0 5 votes vote down vote up
private List<Sample> extractData(List<CAS> aCasses, String aLayerName, String aFeatureName)
{
    long start = System.currentTimeMillis();
    
    List<Sample> data = new ArrayList<>();
    
    int docNo = 0;
    for (CAS cas : aCasses) {
        Type sentenceType = getType(cas, Sentence.class);
        Type tokenType = getType(cas, Token.class);
        Type annotationType = getType(cas, aLayerName);
        Feature predictedFeature = annotationType.getFeatureByBaseName(aFeatureName);
        
        for (AnnotationFS sentence : select(cas, sentenceType)) {
            List<Span> spans = new ArrayList<>();
            
            for (AnnotationFS annotation : selectCovered(annotationType, sentence)) {
                String label = annotation.getFeatureValueAsString(predictedFeature);
                if (isNotEmpty(label)) {
                    spans.add(new Span(annotation.getBegin(), annotation.getEnd(),
                            annotation.getCoveredText(),
                            annotation.getFeatureValueAsString(predictedFeature), -1.0));
                }
            }
            
            Collection<AnnotationFS> tokens = selectCovered(tokenType, sentence);

            data.add(new Sample(docNo, cas.getDocumentText(), tokens, spans));
        }
        
        docNo++;
    }
    
    log.trace("Extracting data took {}ms", System.currentTimeMillis() - start);
    
    return data;
}
 
Example 3
Source File: OpenNlpDoccatRecommender.java    From inception with Apache License 2.0 5 votes vote down vote up
private List<DocumentSample> extractSamples(List<CAS> aCasses)
{
    List<DocumentSample> samples = new ArrayList<>();
    casses: for (CAS cas : aCasses) {
        Type sentenceType = getType(cas, Sentence.class);
        Type tokenType = getType(cas, Token.class);

        Map<AnnotationFS, List<AnnotationFS>> sentences = indexCovered(
                cas, sentenceType, tokenType);
        for (Entry<AnnotationFS, List<AnnotationFS>> e : sentences.entrySet()) {
            AnnotationFS sentence = e.getKey();
            Collection<AnnotationFS> tokens = e.getValue();
            String[] tokenTexts = tokens.stream()
                .map(AnnotationFS::getCoveredText)
                .toArray(String[]::new);
            
            Type annotationType = getType(cas, layerName);
            Feature feature = annotationType.getFeatureByBaseName(featureName);
            
            for (AnnotationFS annotation : selectCovered(annotationType, sentence)) {
                if (samples.size() >= traits.getTrainingSetSizeLimit()) {
                    break casses;
                }
                
                String label = annotation.getFeatureValueAsString(feature);
                DocumentSample nameSample = new DocumentSample(
                        label != null ? label : NO_CATEGORY, tokenTexts);
                if (nameSample.getCategory() != null) {
                    samples.add(nameSample);
                }
            }
        }
    }
    
    return samples;
}
 
Example 4
Source File: AutomationUtil.java    From webanno with Apache License 2.0 5 votes vote down vote up
private static boolean isSamAnno(Type aType, AnnotationFS aMFs, AnnotationFS aFs)
{
    for (Feature f : aType.getFeatures()) {
        // anywhere is ok
        if (f.getName().equals(CAS.FEATURE_FULL_NAME_BEGIN)) {
            continue;
        }
        // anywhere is ok
        if (f.getName().equals(CAS.FEATURE_FULL_NAME_END)) {
            continue;
        }
        if (!f.getRange().isPrimitive() && aMFs.getFeatureValue(f) instanceof SofaFS) {
            continue;
        }
        // do not attach relation on empty span annotations
        if (aMFs.getFeatureValueAsString(f) == null) {
            continue;
        }
        if (aFs.getFeatureValueAsString(f) == null) {
            continue;
        }
        if (!aMFs.getFeatureValueAsString(f).equals(aFs.getFeatureValueAsString(f))) {
            return false;
        }
    }
    return true;
}
 
Example 5
Source File: OpenNlpNerRecommender.java    From inception with Apache License 2.0 4 votes vote down vote up
private Span[] extractAnnotatedSpans(CAS aCas, AnnotationFS aSentence,
                                     Collection<AnnotationFS> aTokens) {
    // Convert character offsets to token indices
    Int2ObjectMap<AnnotationFS> idxTokenOffset = new Int2ObjectOpenHashMap<>();
    Object2IntMap<AnnotationFS> idxToken = new Object2IntOpenHashMap<>();
    int idx = 0;
    for (AnnotationFS t : aTokens) {
        idxTokenOffset.put(t.getBegin(), t);
        idxTokenOffset.put(t.getEnd(), t);
        idxToken.put(t, idx);
        idx++;
    }

    // Create spans from target annotations
    Type annotationType = getType(aCas, layerName);
    Feature feature = annotationType.getFeatureByBaseName(featureName);
    List<AnnotationFS> annotations = selectCovered(annotationType, aSentence);
    int numberOfAnnotations = annotations.size();
    List<Span> result = new ArrayList<>();

    int highestEndTokenPositionObserved = 0;
    for (int i = 0; i < numberOfAnnotations; i++) {
        AnnotationFS annotation = annotations.get(i);
        String label = annotation.getFeatureValueAsString(feature);
        
        AnnotationFS beginToken = idxTokenOffset.get(annotation.getBegin());
        AnnotationFS endToken = idxTokenOffset.get(annotation.getEnd());
        if (beginToken == null || endToken == null) {
            LOG.warn("Skipping annotation not starting/ending at token boundaries: [{}-{}, {}]",
                    annotation.getBegin(), annotation.getEnd(), label);
            continue;
        }
        
        int begin = idxToken.get(beginToken);
        int end = idxToken.get(endToken);
        
        // If the begin offset of the current annotation is lower than the highest offset so far
        // observed, then it is overlapping with some annotation that we have seen before. 
        // Because OpenNLP NER does not support overlapping annotations, we skip it.
        if (begin < highestEndTokenPositionObserved) {
            LOG.debug("Skipping overlapping annotation: [{}-{}, {}]", begin, end + 1, label);
            continue;
        }
        
        if (isNotBlank(label)) {
            result.add(new Span(begin, end + 1, label));
            highestEndTokenPositionObserved = end + 1;
        }
    }
    return result.toArray(new Span[result.size()]);
}
 
Example 6
Source File: DL4JSequenceRecommender.java    From inception with Apache License 2.0 4 votes vote down vote up
public List<String> extractTokenLabels(List<AnnotationFS> aTokens,
        List<AnnotationFS> aLabels)
{
    Type annotationType = getType(aTokens.get(0).getCAS(), layerName);
    Feature feature = annotationType.getFeatureByBaseName(featureName);
    
    String[] labels = new String[aTokens.size()];
    int tokenIdx = 0;
    int labelIdx = 0;
    
    boolean seenBeginMatch = false;
    boolean seenEndMatch = false;
    int maxOffset = -1;

    // This loop assumes that labels start and end at token offsets. Labels that span over
    // multiple tokens are supported as well.
    while (tokenIdx < aTokens.size() && labelIdx < aLabels.size()) {
        AnnotationFS token = aTokens.get(tokenIdx);
        AnnotationFS label = aLabels.get(labelIdx);
        
        if (Math.min(label.getBegin(), label.getEnd()) < maxOffset) {
            throw new IllegalArgumentException("Overlapping labels are not supported!");
        }
        
        // Check if we have seen the begin/end of the label matching a token boundary
        seenBeginMatch |= label.getBegin() == token.getBegin();
        seenEndMatch |= label.getEnd() == token.getEnd();
        
        // First step: collect the label
        if (label.getBegin() <= token.getBegin() && token.getEnd() <= label.getEnd()) {
            String value = label.getFeatureValueAsString(feature);
            labels[tokenIdx] = StringUtils.defaultIfEmpty(value, NO_LABEL);
        }
        else {
            labels[tokenIdx] = NO_LABEL;
        }
        
        // Second step: move to next label (if necessary)
        if (label.getEnd() <= token.getEnd()) {
            labelIdx++;
            
            if (!seenBeginMatch || !seenEndMatch) {
                throw new IllegalArgumentException("Labels must start/end at token boundaries!");
            }
            
            seenBeginMatch = false;
            seenEndMatch = false;
            maxOffset = Math.max(label.getBegin(), label.getEnd());
        }
            
        // In any case, we move to the next token
        tokenIdx++;
    }
    
    if (labelIdx < aLabels.size()) {
        throw new IllegalArgumentException("Overlapping labels are not supported!");
    }
    
    // If we ran out of labels before seeing all tokens, set the label for the remaining 
    // tokens here.
    while (tokenIdx < aTokens.size()) {
        labels[tokenIdx] = NO_LABEL;
        tokenIdx++;
    }
    
    return asList(labels);
}
 
Example 7
Source File: WebannoTsv2Writer.java    From webanno with Apache License 2.0 4 votes vote down vote up
private void setTokenAnnos(CAS aCas, Map<Integer, String> aTokenAnnoMap, Type aType,
        Feature aFeature)
{
    LowLevelCAS llCas = aCas.getLowLevelCAS();
    for (AnnotationFS annoFs : CasUtil.select(aCas, aType)) {
        boolean first = true;
        boolean previous = false; // exists previous annotation, place-holed O-_ should be kept
        for (Token token : selectCovered(Token.class, annoFs)) {
            if (annoFs.getBegin() <= token.getBegin() && annoFs.getEnd() >= token.getEnd()) {
                String annotation = annoFs.getFeatureValueAsString(aFeature);
                if (annotation == null) {
                    annotation = aType.getName() + "_";
                }
                if (aTokenAnnoMap.get(llCas.ll_getFSRef(token)) == null) {
                    if (previous) {
                        if (!multipleSpans.contains(aType.getName())) {
                            aTokenAnnoMap.put(llCas.ll_getFSRef(token), annotation);
                        }
                        else {
                            aTokenAnnoMap.put(llCas.ll_getFSRef(token), "O-_|"
                                    + (first ? "B-" : "I-") + annotation);
                            first = false;
                        }
                    }
                    else {
                        if (!multipleSpans.contains(aType.getName())) {
                            aTokenAnnoMap.put(llCas.ll_getFSRef(token), annotation);
                        }
                        else {
                            aTokenAnnoMap.put(llCas.ll_getFSRef(token), (first ? "B-" : "I-")
                                    + annotation);
                            first = false;
                        }
                    }
                }
                else {
                    if (!multipleSpans.contains(aType.getName())) {
                        aTokenAnnoMap.put(llCas.ll_getFSRef(token),
                                aTokenAnnoMap.get(llCas.ll_getFSRef(token)) + "|"
                                        + annotation);
                        previous = true;
                    }
                    else {
                        aTokenAnnoMap.put(llCas.ll_getFSRef(token),
                                aTokenAnnoMap.get(llCas.ll_getFSRef(token)) + "|"
                                        + (first ? "B-" : "I-") + annotation);
                        first = false;
                        previous = true;
                    }
                }

            }
        }
    }
}
 
Example 8
Source File: WebannoTsv3Writer.java    From webanno with Apache License 2.0 4 votes vote down vote up
private void setChainAnnoPerFeature(Map<AnnotationUnit, List<List<String>>> aAnnotationsPertype,
        Type aType, AnnotationFS aFs, AnnotationUnit aUnit, int aLinkNo, int achainNo,
        boolean aMultiUnit, boolean aFirst)
{
    List<String> annoPerFeatures = new ArrayList<>();
    List<Feature> features = aType.getFeatures();
    Collections.sort(features, (a, b) -> 
            StringUtils.compare(a.getShortName(), b.getShortName()));
    for (Feature feature : features) {
        if (feature.getName().equals(CAS.FEATURE_FULL_NAME_SOFA)
                || feature.getName().equals(CAS.FEATURE_FULL_NAME_BEGIN)
                || feature.getName().equals(CAS.FEATURE_FULL_NAME_END)
                || feature.getShortName().equals(GOVERNOR)
                || feature.getShortName().equals(DEPENDENT)
                || feature.getShortName().equals(FIRST)
                || feature.getShortName().equals(NEXT)) {
            continue;
        }
        String annotation = aFs.getFeatureValueAsString(feature);

        if (annotation == null) {
            annotation = "*";
        }
        else {
            annotation = replaceEscapeChars(annotation);
        }

        if (feature.getShortName().equals(REF_REL)) {
            annotation = annotation + "->" + achainNo + "-" + aLinkNo;
        }
        else if (aMultiUnit) {
            annotation = annotation + "[" + achainNo + "]";
        }
        else {
            annotation = annotation + "[" + achainNo + "]";
        }
        featurePerLayer.get(aType.getName()).add(feature.getShortName());

        annoPerFeatures.add(annotation);
    }
    aAnnotationsPertype.putIfAbsent(aUnit, new ArrayList<>());
    ambigUnits.putIfAbsent(aType.getName(), new HashMap<>());
    ambigUnits.get(aType.getName()).put(aUnit, true); // coref are always ambig

    if (annoPerFeatures.size() == 0) {
        annoPerFeatures.add("*" + "[" + achainNo + "]");
    }
    aAnnotationsPertype.get(aUnit).add(annoPerFeatures);
}
 
Example 9
Source File: WebannoTsv3Writer.java    From webanno with Apache License 2.0 4 votes vote down vote up
private void setRelationAnnoPerFeature(
        Map<AnnotationUnit, List<List<String>>> annotationsPertype, Type type, AnnotationFS fs,
        AnnotationUnit depUnit, AnnotationUnit govUnit, int aGovRef, int aDepRef, Type aDepType)
{
    List<String> annoPerFeatures = new ArrayList<>();
    featurePerLayer.putIfAbsent(type.getName(), new LinkedHashSet<>());
    List<Feature> features = type.getFeatures();
    Collections.sort(features, (a, b) -> 
            StringUtils.compare(a.getShortName(), b.getShortName()));
    for (Feature feature : features) {
        if (feature.getName().equals(CAS.FEATURE_FULL_NAME_SOFA)
                || feature.getName().equals(CAS.FEATURE_FULL_NAME_BEGIN)
                || feature.getName().equals(CAS.FEATURE_FULL_NAME_END)
                || feature.getShortName().equals(GOVERNOR)
                || feature.getShortName().equals(DEPENDENT)
                || feature.getShortName().equals(FIRST)
                || feature.getShortName().equals(NEXT)) {
            continue;
        }
        int ref = getRefId(type, fs, depUnit);
        String annotation = fs.getFeatureValueAsString(feature);
        if (annotation == null) {
            annotation = "*";
        }
        else {
            annotation = replaceEscapeChars(annotation);
        }
        annoPerFeatures.add(annotation);// +(ref > 0 ? "[" + ref + "]" : ""));
        featurePerLayer.get(type.getName()).add(feature.getShortName());
    }
    // add the governor and dependent unit addresses (separated by _
    String govRef = unitsLineNumber.get(govUnit)
            + ((aDepRef > 0 || aGovRef > 0) ? "[" + aGovRef + "_" + aDepRef + "]" : "");
    annoPerFeatures.add(govRef);
    featurePerLayer.get(type.getName()).add(BT + aDepType.getName());
    // the column for the dependent unit address
    annotationsPertype.putIfAbsent(depUnit, new ArrayList<>());
    if (annoPerFeatures.size() == 0) {
        annoPerFeatures.add("*");
    }
    annotationsPertype.get(depUnit).add(annoPerFeatures);
}