Java Code Examples for org.apache.uima.jcas.tcas.Annotation#setBegin()

The following examples show how to use org.apache.uima.jcas.tcas.Annotation#setBegin() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AnnotationUtilsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetSingleCovered() {
  final Annotation a = new Annotation(jCas);
  a.setBegin(0);
  a.setEnd(4);

  final Optional<Annotation> single = AnnotationUtils.getSingleCovered(Annotation.class, a);
  Assert.assertEquals("012", single.get().getCoveredText());
}
 
Example 2
Source File: AnnotationUtilsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetSingleCoveredMissing() {
  final Annotation a = new Annotation(jCas);
  a.setBegin(1);
  a.setEnd(12);

  final Optional<Annotation> missing = AnnotationUtils.getSingleCovered(Annotation.class, a);
  Assert.assertFalse(missing.isPresent());
}
 
Example 3
Source File: DummyAnnotator2.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
  // Do nothing
  Pattern p = Pattern.compile("\\d+");
  Matcher m = p.matcher(aJCas.getDocumentText());
  while (m.find()) {
    Annotation a = new Annotation(aJCas);
    a.setBegin(m.start());
    a.setEnd(m.end());

    a.addToIndexes();
  }
}
 
Example 4
Source File: AbbreviationsExpanderAnnotator.java    From bluima with Apache License 2.0 5 votes vote down vote up
public static void expandAbbreviations(JCas jCas) {
    String pmId = getHeaderDocId(jCas);

    // otherwise was very slow
    Map<Abbreviation, List<Annotation>> cache = newHashMap();

    List<Abbreviation> tmp = newLinkedList(select(jCas, Abbreviation.class));
    for (Abbreviation abrev : tmp) {

        Annotation reference = abrev.getTextReference();
        if (reference != null && reference instanceof Abbreviation) {
            Abbreviation aRef = (Abbreviation) reference;

            List<Annotation> covereds;
            if (cache.containsKey(aRef))
                covereds = cache.get(aRef);
            else {
                covereds = getCovered(jCas, aRef, pmId);
                cache.put(aRef, covereds);
            }

            // copy them to the other abbreviation short-forms
            for (Annotation covered : covereds) {

                Annotation clone = (Annotation) covered.clone();
                clone.setBegin(abrev.getBegin());
                clone.setEnd(abrev.getEnd());
                clone.addToIndexes(jCas);

                if (!clone.getCoveredText().equals(aRef.getCoveredText()))
                    LOG.warn("'{}' not matching2 '{}' in " + pmId,
                            clone.getCoveredText(), aRef.getCoveredText());
            }
        }
    }
}
 
Example 5
Source File: CASImpl.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
@Override
  public Annotation createAnnotation(Type type, int begin, int end) {
    // duplicates a later check
//    if (this.isBaseCas()) {
//      // Can't create annotation on base CAS
//      throw new CASRuntimeException(CASRuntimeException.INVALID_BASE_CAS_METHOD, "createAnnotation(Type, int, int)");
//    }
    Annotation fs = (Annotation) createFS(type);
    fs.setBegin(begin);
    fs.setEnd(end);
    return fs;
  }
 
Example 6
Source File: IndexCorruptionReportingTest.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
public void testReport() throws Exception {
  JCas jcas = cas.getJCas();
  Annotation a = new Annotation(jcas, 0, 10);
  a.addToIndexes();
  try {
    a.setBegin(2);
  } catch (UIMARuntimeException e) {
    assertTrue(e.getMessageKey().equals(UIMARuntimeException.ILLEGAL_FS_FEAT_UPDATE));
  }
}
 
Example 7
Source File: AnnotationUtilsTest.java    From baleen with Apache License 2.0 4 votes vote down vote up
private void addAnnotation(final int start, final int end) {
  final Annotation a = new WordToken(jCas);
  a.setBegin(start);
  a.setEnd(end);
  a.addToIndexes();
}
 
Example 8
Source File: MongoCollectionReader.java    From bluima with Apache License 2.0 4 votes vote down vote up
@Override
public void getNext(JCas jCas) throws IOException, CollectionException {

    // text & id
    DBObject doc = cur.next();
    Object text = doc.get(TEXT);
    if (text != null)
        jCas.setDocumentText(doc.get(TEXT).toString());
    else
        jCas.setDocumentText("");
    Header h = new Header(jCas);
    h.setDocId(doc.get(ID).toString());
    if (doc.containsField(TITLE) && doc.get(TITLE) != null)
        h.setTitle(doc.get(TITLE).toString());
    else
        h.setTitle("");
    h.addToIndexes();

    // all other annotations, from mappings
    for (String dbListsName : doc.keySet()) {

        for (String annotClass : ALL_MAPPINGS_KEYS) {
            MongoFieldMapping fm = ALL_MAPPINGS.get(annotClass);

            if (fm.shortName.equals(dbListsName)) {

                BasicDBList dbList = (BasicDBList) doc.get(dbListsName);
                for (Object o : dbList) {
                    BasicDBObject dbO = (BasicDBObject) o;

                    try {
                        Annotation a = getAnnotationByClassName(jCas,
                                annotClass);
                        a.setBegin(dbO.getInt(BEGIN));// LATER maybe opt.
                        a.setEnd(dbO.getInt(END));

                        Type t = a.getType();
                        for (Feature f : t.getFeatures()) {
                            // System.err.println("f.short "
                            // + f.getShortName());

                            if (fm.fieldMappings.containsKey(f
                                    .getShortName())) {

                                String fieldKey = fm.fieldMappings.get(f
                                        .getShortName());
                                String range = f.getRange().getShortName();

                                MongoFieldMapping.readFieldFromDb(fieldKey,
                                        range, a, f, dbO, jCas);
                            }
                        }
                        a.addToIndexes();

                    } catch (Exception e) {
                        LOG.error("while processing docId " + doc.get(ID),
                                e);
                    }
                }
            }
        }
    }
}
 
Example 9
Source File: FeatureStructureTest.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
/**
 * This test tests V2 backwards compatibility 
 * The goal is to match what V2 did for low level cas access
 * The area this is testing is the use of the LL int operations to change the type of an existing feature structure.
 */
public void testLLsetType() {
  LowLevelCAS llc = cas.getLowLevelCAS();
   FSArray fsa = new FSArray(ts.getType(CAS.TYPE_NAME_FS_ARRAY), cas, 3);
   fsa.addToIndexes();  // otherwise won't be replaced later
   NonEmptyFSList  fsl = new NonEmptyFSList(ts.getType(CAS.TYPE_NAME_NON_EMPTY_FS_LIST), cas);
   fsl.addToIndexes(); // otherwise won't be replaced later
   
   Annotation token = this.cas.createFS(tokenType);
   cas.setId2FSsMaybeUnconditionally(token);  
   
   // set up some refs; these must be updated if the type changes in a way to require a new FS
   fsa.set(0, token);   // set the 0th  element of a FS Array to point to the "token"
   fsl.setHead(token);  // set the head element of a FS List to point to the "token"
   int tokId = token._id();
   
   // set some feature values; some of these are copied (if there's room, etc.)
   TOP ttfv = cas.createFS(tokenTypeType);
   token.setFeatureValue(tokenTypeFeat, ttfv);
   token.setFloatValue(tokenFloatFeat, 1.1f);
   assertEquals(1.1f, token.getFloatValue(tokenFloatFeat));
   token.setDoubleValue(tokenDoubleFeat, 1.7d);
   assertEquals(1.7d, token.getDoubleValue(tokenDoubleFeat));
   token.setBegin(3);
   token.setEnd(5);
   
   Sofa sofa = (Sofa) token.getSofa();
   assertTrue(sofa != null);
   assertTrue(fsa.get(0) == token);
   assertTrue(fsl.getHead() == token);
   
   // change the type to just Annotation
   // because this is a supertype, it should not create a new FS
   
   llc.ll_setIntValue(tokId, 0, TypeSystemConstants.annotTypeCode);
   Annotation fs = cas.getFsFromId(tokId);
   assertTrue(fs == token);
   assertTrue(fs._id() == token._id());
   assertEquals(ts.annotType, fs._getTypeImpl());
   assertEquals(fs.getBegin(), 3);
   assertEquals(fs.getEnd(), 5);
   assertEquals(sofa, fs.getSofa());
   assertTrue(fsa.get(0) == fs);
   assertTrue(fsl.getHead() == fs);
   
   // Change Annotation back to Token type    
   
   llc.ll_setIntValue(tokId, 0, tokenType.getCode());
   token = cas.getFsFromId(tokId);
   assertTrue(fs == token);
   assertTrue(fs._id() == token._id());
   assertEquals(fs.getBegin(), 3);
   assertEquals(fs.getEnd(), 5);
   assertEquals(sofa, fs.getSofa());
   assertEquals(1.1f, token.getFloatValue(tokenFloatFeat));
   assertEquals(ttfv, token.getFeatureValue(tokenTypeFeat));
   assertTrue(fsa.get(0) == token);
   assertTrue(fsl.getHead() == token);
   
   // change type where the type forces a copy
   // token -> token_type_type
   //  These types are completely orthogonal, one doesn't subsume the other
   
   llc.ll_setIntValue(tokId,  0,  tokenTypeType.getCode());
   TOP ttt = cas.getFsFromId(tokId);
   assertTrue(ttt != token);
   assertTrue(ttt._id() == tokId);
   assertEquals(ttt._getTypeImpl(), tokenTypeType);
   assertTrue(fsa.get(0) == ttt);
   assertTrue(fsl.getHead() == ttt);
   
   
   llc.ll_setIntValue(tokId,  0,  tokenType.getCode());
   token = cas.getFsFromId(tokId);
   assertTrue(ttt != token);
   assertTrue(ttt._id() == token._id());
   assertEquals(token.getBegin(), 0);
   assertEquals(token.getEnd(), 0);
   assertEquals(sofa, token.getSofa());
   assertEquals(0.0f, token.getFloatValue(tokenFloatFeat));
   assertEquals(null, token.getFeatureValue(tokenTypeFeat));
   assertTrue(fsa.get(0) == token);
   assertTrue(fsl.getHead() == token);

}
 
Example 10
Source File: SimpleTextMerger.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
public void process(JCas aJCas) throws AnalysisEngineProcessException {
  // procure a new CAS if we don't have one already
  if (mMergedCas == null) {
    mMergedCas = getEmptyJCas();
  }

  // append document text
  String docText = aJCas.getDocumentText();
  int prevDocLen = mDocBuf.length();
  mDocBuf.append(docText);

  // copy specified annotation types
  CasCopier copier = new CasCopier(aJCas.getCas(), mMergedCas.getCas());
  Set copiedIndexedFs = new HashSet(); // needed in case one annotation is in two indexes (could
  // happen if specified annotation types overlap)
  for (int i = 0; i < mAnnotationTypesToCopy.length; i++) {
    Type type = mMergedCas.getTypeSystem().getType(mAnnotationTypesToCopy[i]);
    FSIndex index = aJCas.getCas().getAnnotationIndex(type);
    Iterator iter = index.iterator();
    while (iter.hasNext()) {
      FeatureStructure fs = (FeatureStructure) iter.next();
      if (!copiedIndexedFs.contains(fs)) {
        Annotation copyOfFs = (Annotation) copier.copyFs(fs);
        // update begin and end
        copyOfFs.setBegin(copyOfFs.getBegin() + prevDocLen);
        copyOfFs.setEnd(copyOfFs.getEnd() + prevDocLen);
        mMergedCas.addFsToIndexes(copyOfFs);
        copiedIndexedFs.add(fs);
      }
    }
  }

  // get the SourceDocumentInformation FS, which indicates the sourceURI of the document
  // and whether the incoming CAS is the last segment
  FSIterator it = aJCas.getAnnotationIndex(SourceDocumentInformation.type).iterator();
  if (!it.hasNext()) {
    throw new AnalysisEngineProcessException(MESSAGE_DIGEST, MISSING_SOURCE_DOCUMENT_INFO,
            new Object[0]);
  }
  SourceDocumentInformation sourceDocInfo = (SourceDocumentInformation) it.next();
  if (sourceDocInfo.getLastSegment()) {
    // time to produce an output CAS
    // set the document text
    mMergedCas.setDocumentText(mDocBuf.toString());

    // add source document info to destination CAS
    SourceDocumentInformation destSDI = new SourceDocumentInformation(mMergedCas);
    destSDI.setUri(sourceDocInfo.getUri());
    destSDI.setOffsetInSource(0);
    destSDI.setLastSegment(true);
    destSDI.addToIndexes();

    mDocBuf = new StringBuffer();
    mReadyToOutput = true;
  }
}