Java Code Examples for org.apache.uima.cas.CAS#getAnnotationIndex()

The following examples show how to use org.apache.uima.cas.CAS#getAnnotationIndex() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParagraphAnnotator.java    From biomedicus with Apache License 2.0 6 votes vote down vote up
@Override
public void process(CAS aCAS) {
  LOGGER.trace("Annotating rtf paragraphs.");
  CAS systemView = aCAS.getView(documentName);

  Type newParagraphType = systemView.getTypeSystem()
      .getType("biomedicus.v2.rtf.NewParagraph");

  Type paragraphType = systemView.getTypeSystem()
      .getType("biomedicus.v2.Paragraph");

  AnnotationIndex<AnnotationFS> newParagraphIndex = systemView
      .getAnnotationIndex(newParagraphType);
  int start = 0;

  for (AnnotationFS newParagraph : newParagraphIndex) {
    int end = newParagraph.getEnd();
    systemView.addFsToIndexes(
        systemView.createAnnotation(paragraphType, start, end));

    start = end;
  }
}
 
Example 2
Source File: RemoveDanglingRelationsRepair.java    From webanno with Apache License 2.0 5 votes vote down vote up
@Override
public void repair(Project aProject, CAS aCas, List<LogMessage> aMessages)
{
    Set<FeatureStructure> nonIndexed = getNonIndexedFSes(aCas);
    
    Set<FeatureStructure> toDelete = new LinkedHashSet<>();
    
    for (AnnotationFS fs : aCas.getAnnotationIndex()) {
        Type t = fs.getType();
        
        Feature sourceFeat = t.getFeatureByBaseName(WebAnnoConst.FEAT_REL_SOURCE);
        Feature targetFeat = t.getFeatureByBaseName(WebAnnoConst.FEAT_REL_TARGET);
        
        // Is this a relation?
        if (!(sourceFeat != null && targetFeat != null)) {
            continue;
        }
        
        FeatureStructure source = fs.getFeatureValue(sourceFeat);
        FeatureStructure target = fs.getFeatureValue(targetFeat);
        
        // Does it point to deleted spans?
        if (nonIndexed.contains(source) || nonIndexed.contains(target)) {
            toDelete.add(fs);
        }
    }

    // Delete those relations that pointed to deleted spans
    if (!toDelete.isEmpty()) {
        toDelete.forEach(aCas::removeFsFromIndexes);
        aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed [%d] dangling relations.",
                nonIndexed.size()));
    }
}
 
Example 3
Source File: TypeGroupedContentProvider.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
@Override
 public void changed() {
	nameAnnotationTypeNodeMap.clear();
	
	TypeSystem typeSystem = mInputDocument.getCAS().getTypeSystem();
	
	List<Type> types = typeSystem.getProperlySubsumedTypes(
			typeSystem.getType(CAS.TYPE_NAME_ANNOTATION));
	
	types.add(typeSystem.getType(CAS.TYPE_NAME_ANNOTATION));
	
	for (Type type : types) {
		
		AnnotationTypeTreeNode typeNode = new AnnotationTypeTreeNode(type);
		
		nameAnnotationTypeNodeMap.put(type.getName(), typeNode);
		
		CAS cas = mInputDocument.getCAS();
		
		AnnotationIndex<AnnotationFS> index = cas.getAnnotationIndex(type);

		for (AnnotationFS annotation : index) {
			if (annotation.getType().equals(type)) {
				typeNode.add(new AnnotationTreeNode(mInputDocument, annotation));
			}
		}
	}

     Display.getDefault().syncExec(new Runnable() {
       @Override
       public void run() {
       	viewer.refresh();
       }
     });
}
 
Example 4
Source File: AnnotationEditor.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
/**
 * Retrieves the annotations in the given span.
 *
 * @param cas the cas
 * @param type the type
 * @param span the span
 * @return the annotation
 */
static Collection<AnnotationFS> getAnnotation(CAS cas, Type type, Span span) {
  ConstraintFactory cf = cas.getConstraintFactory();

  Type annotationType = cas.getAnnotationType();

  FeaturePath beginPath = cas.createFeaturePath();
  beginPath.addFeature(annotationType.getFeatureByBaseName("begin"));
  FSIntConstraint beginConstraint = cf.createIntConstraint();
  beginConstraint.geq(span.getStart());

  FSMatchConstraint embeddedBegin = cf.embedConstraint(beginPath, beginConstraint);

  FeaturePath endPath = cas.createFeaturePath();
  endPath.addFeature(annotationType.getFeatureByBaseName("end"));
  FSIntConstraint endConstraint = cf.createIntConstraint();
  endConstraint.leq(span.getEnd());

  FSMatchConstraint embeddedEnd = cf.embedConstraint(endPath, endConstraint);

  FSMatchConstraint strictType = new StrictTypeConstraint(type);

  FSMatchConstraint annotationInSpanConstraint = cf.and(embeddedBegin, embeddedEnd);

  FSMatchConstraint annotationInSpanAndStrictTypeConstraint =
          cf.and(annotationInSpanConstraint, strictType);

  FSIndex<AnnotationFS> allAnnotations = cas.getAnnotationIndex(type);

  FSIterator<AnnotationFS> annotationInsideSpanIndex =
          cas.createFilteredIterator(allAnnotations.iterator(),
          annotationInSpanAndStrictTypeConstraint);

  return DocumentUimaImpl.fsIteratorToCollection(annotationInsideSpanIndex);
}
 
Example 5
Source File: SymbolIndexedDocument.java    From biomedicus with Apache License 2.0 4 votes vote down vote up
/**
 * Indexes all the symbols from an original document.
 *
 * @param originalDocumentView jCas original document view.
 * @return The newly created symbol indexed document.
 */
public static SymbolIndexedDocument fromView(CAS originalDocumentView) {
  Type viewIndexType = originalDocumentView.getTypeSystem()
      .getType("edu.umn.biomedicus.rtfuima.type.ViewIndex");

  Feature destinationNameFeature = viewIndexType
      .getFeatureByBaseName("destinationName");
  Feature destinationIndexFeature = viewIndexType
      .getFeatureByBaseName("destinationIndex");

  AnnotationIndex<AnnotationFS> viewIndexAI = originalDocumentView
      .getAnnotationIndex(viewIndexType);

  List<SymbolLocation> symbolLocations = new ArrayList<>();

  Map<String, Map<Integer, Integer>> destinationMap = new HashMap<>();

  int index = 0;
  int lastEnd = 0;
  for (AnnotationFS annotation : viewIndexAI) {
    int begin = annotation.getBegin();
    int end = annotation.getEnd();

    String destinationName
        = annotation.getStringValue(destinationNameFeature);

    SymbolLocation symbolLocation = new SymbolLocation(
        destinationName,
        begin - lastEnd,
        end - begin,
        index++
    );

    symbolLocations.add(symbolLocation);

    int destinationIndex
        = annotation.getIntValue(destinationIndexFeature);

    destinationMap.compute(destinationName,
        (String key, @Nullable Map<Integer, Integer> value) -> {
          if (value == null) {
            value = new HashMap<>();
          }
          value.put(destinationIndex, symbolLocations.size() - 1);

          return value;
        });
    lastEnd = end;
  }
  return new SymbolIndexedDocument(symbolLocations, destinationMap,
      originalDocumentView.getDocumentText());
}
 
Example 6
Source File: AnnotationInsertingWriter.java    From biomedicus with Apache License 2.0 4 votes vote down vote up
@Override
public void process(CAS aCAS) throws AnalysisEngineProcessException {
  CAS originalDocumentView = aCAS.getView(rtfDocumentName);
  SymbolIndexedDocument symbolIndexedDocument =
      SymbolIndexedDocument.fromView(originalDocumentView);

  CAS view = aCAS.getView(documentName);

  TreeSet<Integer> covered = new TreeSet<>();
  for (String annotationType : Objects.requireNonNull(annotationTypes)) {
    Type type = view.getTypeSystem().getType(annotationType);

    AnnotationIndex<Annotation> annotationIndex = view.getAnnotationIndex(type);

    for (Annotation annotation : annotationIndex) {
      IntStream.rangeClosed(annotation.getBegin(), annotation.getEnd()).forEach(covered::add);
    }
  }

  Iterator<Integer> iterator = covered.iterator();
  int next = iterator.next();
  int last = -1;
  while (iterator.hasNext()) {
    int first = next;
    while (iterator.hasNext()) {
      last = next;
      next = iterator.next();
      if (next - last > 1) {
        break;
      }
    }
    RegionTaggerBuilder.create()
        .withBeginTag("\\u2222221B ")
        .withEndTag("\\u2222221E ")
        .withSymbolIndexedDocument(symbolIndexedDocument)
        .withDestinationName(documentName)
        .withBegin(first)
        .withEnd(last)
        .createRegionTagger()
        .tagRegion();
  }

  String rewrittenDocument = symbolIndexedDocument.getDocument();

  Artifact artifact = UimaAdapters.getArtifact(aCAS, null);

  Path file = outputDir.resolve(artifact.getArtifactID() + ".rtf");

  try (BufferedWriter bufferedWriter = Files
      .newBufferedWriter(file, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.CREATE)) {
    bufferedWriter.write(rewrittenDocument);
  } catch (IOException e) {
    throw new AnalysisEngineProcessException(e);
  }
}
 
Example 7
Source File: TableAnnotator.java    From biomedicus with Apache License 2.0 4 votes vote down vote up
@Override
public void process(CAS aCAS) {
  LOGGER.trace("Annotating rtf tables.");
  CAS systemView = aCAS.getView(documentName);

  TypeSystem typeSystem = aCAS.getTypeSystem();
  Type intblType = typeSystem.getType("biomedicus.v2.rtf.ParagraphInTable");
  Type rowEndType = typeSystem.getType("biomedicus.v2.rtf.RowEnd");
  Type rowType = typeSystem.getType("biomedicus.v2.Row");

  ArrayList<Integer> intblBegins = new ArrayList<>();
  AnnotationIndex<AnnotationFS> intblIndex = systemView.getAnnotationIndex(intblType);
  for (AnnotationFS annotationFS : intblIndex) {
    intblBegins.add(annotationFS.getBegin());
  }

  ArrayList<Integer> rowEnds = new ArrayList<>();
  AnnotationIndex<AnnotationFS> rowEndIndex = systemView.getAnnotationIndex(rowEndType);
  for (AnnotationFS rowEndAnnotation : rowEndIndex) {
    rowEnds.add(rowEndAnnotation.getBegin());
  }

  int last = 0;
  for (Integer intblBegin : intblBegins) {
    if (intblBegin < last) {
      continue;
    }
    int insert = Collections.binarySearch(rowEnds, intblBegin);
    if (insert < 0) {
      insert = insert * -1 - 1;
      if (insert == rowEnds.size()) {
        LOGGER.warn("Rtf intbl paragraph after the last row end.");
        continue;
      }

      int end = rowEnds.get(insert);
      systemView.addFsToIndexes(systemView.createAnnotation(rowType, intblBegin, end));
    }
  }

  Type cellEndType = typeSystem.getType("biomedicus.v2.rtf.CellEnd");
  Type cellType = typeSystem.getType("biomedicus.v2.Cell");

  Type nestRowEndType = typeSystem
      .getType("biomedicus.v2.rtf.NestRowEnd");
  Type nestedRowType = typeSystem
      .getType("biomedicus.v2.NestedRow");
  Type nestedCellEndType = typeSystem
      .getType("biomedicus.v2.rtf.NestCellEnd");
  Type nestedCellType = typeSystem
      .getType("biomedicus.v2.NestedCell");

  TableAnnotationDivider tableAnnotationDivider = TableAnnotationDivider.in(systemView);
  tableAnnotationDivider.using(cellEndType)
      .divide(rowType)
      .into(cellType)
      .execute();

  tableAnnotationDivider.using(nestRowEndType)
      .divide(cellType)
      .into(nestedRowType)
      .execute();

  tableAnnotationDivider.using(nestedCellEndType)
      .divide(nestedRowType)
      .into(nestedCellType)
      .execute();
}
 
Example 8
Source File: XmiCasDeserializerTest.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
public void testTypeSystemFiltering() throws Exception {
  try {
    // deserialize a complex CAS from XCAS
    CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);

    InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
    XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem());
    ContentHandler deserHandler = deser.getXCASHandler(cas);
    SAXParserFactory fact = SAXParserFactory.newInstance();
    SAXParser parser = fact.newSAXParser();
    XMLReader xmlReader = parser.getXMLReader();
    xmlReader.setContentHandler(deserHandler);
    xmlReader.parse(new InputSource(serCasStream));
    serCasStream.close();

    // now read in a TypeSystem that's a subset of those types
    TypeSystemDescription partialTypeSystemDesc = UIMAFramework.getXMLParser()
            .parseTypeSystemDescription(
                    new XMLInputSource(JUnitExtension
                            .getFile("ExampleCas/partialTestTypeSystem.xml")));
    TypeSystem partialTypeSystem = CasCreationUtils.createCas(partialTypeSystemDesc, null, null)
            .getTypeSystem();

    // reserialize as XMI, filtering out anything that doesn't fit in the
    // partialTypeSystem
    StringWriter sw = new StringWriter();
    XMLSerializer xmlSer = new XMLSerializer(sw, false);
    XmiCasSerializer xmiSer = new XmiCasSerializer(partialTypeSystem);
    xmiSer.serialize(cas, xmlSer.getContentHandler());
    String xml = sw.getBuffer().toString();
    // System.out.println(xml);

    // deserialize into another CAS (which has the whole type system)
    CAS cas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
    XmiCasDeserializer deser2 = new XmiCasDeserializer(cas2.getTypeSystem());
    ContentHandler deserHandler2 = deser2.getXmiCasHandler(cas2);
    xmlReader.setContentHandler(deserHandler2);
    xmlReader.parse(new InputSource(new StringReader(xml)));

    // check that types have been filtered out
    Type orgType = cas2.getTypeSystem().getType("org.apache.uima.testTypeSystem.Organization");
    assertNotNull(orgType);
    assertTrue(cas2.getAnnotationIndex(orgType).size() == 0);
    assertTrue(cas.getAnnotationIndex(orgType).size() > 0);

    // but that some types are still there
    Type personType = cas2.getTypeSystem().getType("org.apache.uima.testTypeSystem.Person");
    FSIndex personIndex = cas2.getAnnotationIndex(personType);
    assertTrue(personIndex.size() > 0);

    // check that mentionType has been filtered out (set to null)
    FeatureStructure somePlace = personIndex.iterator().get();
    Feature mentionTypeFeat = personType.getFeatureByBaseName("mentionType");
    assertNotNull(mentionTypeFeat);
    assertNull(somePlace.getStringValue(mentionTypeFeat));
  } catch (Exception e) {
    JUnitExtension.handleException(e);
  }
}
 
Example 9
Source File: XmiCasDeserializerTest.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
public void testDeltaCasIgnorePreexistingFS() throws Exception {
 try {
 CAS cas1 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
         indexes);
  TypeSystem ts = cas1.getTypeSystem();
 CAS cas2 = CasCreationUtils.createCas(ts, new TypePriorities_impl(),
         indexes, null);
 cas1.setDocumentText("This is a test document in the initial view");
 AnnotationFS anAnnot1 = cas1.createAnnotation(cas1.getAnnotationType(), 0, 4);
 cas1.getIndexRepository().addFS(anAnnot1);
 AnnotationFS anAnnot2 = cas1.createAnnotation(cas1.getAnnotationType(), 5, 10);
 cas1.getIndexRepository().addFS(anAnnot2);
 FSIndex tIndex = cas1.getAnnotationIndex();
 assertTrue(tIndex.size() == 3); //doc annot plus  annots
 
 //serialize complete  
 XmiSerializationSharedData sharedData = new XmiSerializationSharedData();
 String xml = this.serialize(cas1, sharedData);
 int maxOutgoingXmiId = sharedData.getMaxXmiId();
 //deserialize into cas2
 XmiSerializationSharedData sharedData2 = new XmiSerializationSharedData();      
 //XmiCasDeserializer.deserialize(new StringBufferInputStream(xml), cas2, true, sharedData2);
 this.deserialize(xml, cas2, sharedData2, true, -1);
 CasComparer.assertEquals(cas1, cas2);
 
 //create Marker, add/modify fs and serialize in delta xmi format.
 Marker marker = cas2.createMarker();
 FSIndex<AnnotationFS> cas2tIndex = cas2.getAnnotationIndex();
 
 //create an annotation and add to index
 AnnotationFS cas2newAnnot = cas2.createAnnotation(cas2.getAnnotationType(), 6, 8);
 cas2.getIndexRepository().addFS(cas2newAnnot);
 assertTrue(cas2tIndex.size() == 4); // prev annots and this new one
 
 //modify an existing annotation
 Iterator<AnnotationFS> tIndexIter = cas2tIndex.iterator();
 AnnotationFS docAnnot = (AnnotationFS) tIndexIter.next(); //doc annot
 //delete from index
 AnnotationFS delAnnot = (AnnotationFS) tIndexIter.next(); //annot
 cas2.getIndexRepository().removeFS(delAnnot);
 assertTrue(cas2.getAnnotationIndex().size() == 3);
 
 //modify language feature
 Feature languageF2 = cas2.getDocumentAnnotation().getType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_LANGUAGE);
 docAnnot.setStringValue(languageF2, "en");
 // serialize cas2 in delta format 
 String deltaxml1 = serialize(cas2, sharedData2, marker);
 //System.out.println("delta cas");
 //System.out.println(deltaxml1);
 
 //deserialize delta xmi into cas1
 this.deserialize(deltaxml1, cas1, sharedData, true, maxOutgoingXmiId, AllowPreexistingFS.ignore);
 
 //check language feature of doc annot is not changed.
 //System.out.println(cas1.getDocumentAnnotation().getStringValue(languageF));
  Feature languageF1 = cas1.getDocumentAnnotation().getType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_LANGUAGE);

 assertTrue( cas1.getAnnotationIndex().iterator().next().getStringValue(languageF1).equals("x-unspecified"));
 //check new annotation exists and preexisting is not deleted
 assertTrue(cas1.getAnnotationIndex().size()==4);
 } catch (Exception e) {
 JUnitExtension.handleException(e);
 }
}