Java Code Examples for org.apache.uima.jcas.JCas

The following examples show how to use org.apache.uima.jcas.JCas. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: bluima   Source File: WhiteTextCollectionReader.java    License: Apache License 2.0 6 votes vote down vote up
public void getNext(JCas jcas) throws IOException, CollectionException {

        Element articleE = articleIt.next();
        String pmid = articleE.getChildText("PMID");
        LOG.trace("processing pmId {}", pmid);
        currentNrDocs++;

        StringBuilder sb = new StringBuilder();

        int i = addAnnotations(jcas, articleE.getChild("ArticleTitle")
                .getContent(), sb, 0);

        sb.append(" ");// add "space"
        i++;

        addAnnotations(jcas, articleE.getChild("AbstractText").getContent(),
                sb, i);

        jcas.setDocumentText(sb.toString());

        Header h = new Header(jcas);
        h.setDocId(pmid);
        h.addToIndexes();
    }
 
Example 2
Source Project: uima-uimafit   Source File: ExternalResourceFactoryTest.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
  assertNotNull("Resource array is null", resourceArray);
  assertEquals(2, resourceArray.length);
  assertTrue("Resource array element 0 is not a DummyResource",
          resourceArray[0] instanceof ResourceWithAssert);
  assertTrue("Resource array element 1 is not a DummyResource",
          resourceArray[1] instanceof ResourceWithAssert);
  assertTrue(resourceArray[0] != resourceArray[1]);
  
  resources.add(resourceArray[0]);
  resources.add(resourceArray[1]);
  
  System.out.printf("Element object 0: %d%n", resourceArray[0].hashCode());
  System.out.printf("Element object 1: %d%n", resourceArray[1].hashCode());
  
  for (ResourceWithAssert res : resourceArray) {
    res.doAsserts();
  }
}
 
Example 3
Source Project: bluima   Source File: FiltersTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testNormalizeMeasures() throws Exception {
    JCas jCas = getTestCas("ab12 mm");

    Measure m = createAnnot(jCas, Measure.class, 2, 6);
    m.setUnit("mm");
    m.setValue(12f);
    createAnnot(jCas, Keep.class, 2, 6).setEnclosedAnnot(m);

    // -> NOT filtered
    run(jCas, "tokenFrequencyFile", "stopwords_empty", 0, 5000);

    for (Keep select : select(jCas, Keep.class)) {
        Prin.t(select);
    }
    assertTrue(exists(jCas, Keep.class));
    assertEquals(MeasureNormalizerAnnotator.MEASURE_MASK + "mm",
            select(jCas, Keep.class).iterator().next().getNormalizedText());

}
 
Example 4
Source Project: uima-uimaj   Source File: BinaryCasSerDes4.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * 
 * @param cas CAS to serialize
 * @param out output object
 * @param trackingMark tracking mark (for delta serialization)
 * @param compressLevel -
 * @param compressStrategy - 
 * @return null or serialization measurements (depending on setting of doMeasurements)
 * @throws IOException if the marker is invalid
 */
public SerializationMeasures serialize(AbstractCas cas, Object out, Marker trackingMark,
    CompressLevel compressLevel, CompressStrat compressStrategy) throws IOException {
  SerializationMeasures sm = (doMeasurements) ? new SerializationMeasures() : null;
  CASImpl casImpl = (CASImpl) ((cas instanceof JCas) ? ((JCas)cas).getCas(): cas);
  if (null != trackingMark && !trackingMark.isValid() ) {
    throw new CASRuntimeException(CASRuntimeException.INVALID_MARKER, "Invalid Marker.");
  }
  
  Serializer serializer = new Serializer(
      casImpl, makeDataOutputStream(out), (MarkerImpl) trackingMark, sm,
      compressLevel, compressStrategy, false);
 
  serializer.serialize();
  return sm;
}
 
Example 5
Source Project: webanno   Source File: WebannoTsv3Reader.java    License: Apache License 2.0 6 votes vote down vote up
private void createSentence(JCas aJCas, String aLine, int aBegin, int aEnd, int aPrevEnd)
{
    // If the next sentence immediately follows the last one without any space or line break
    // in between, then we need to chop off again the linebreak that we added at the end of the
    // last sentence - otherwise offsets will be off on a round-trip.
    if (aPrevEnd == aBegin && coveredText.length() > 0
            && (coveredText.charAt(coveredText.length() - 1) == '\n')) {
        coveredText.deleteCharAt(coveredText.length() - 1);
    }

    if (aPrevEnd + 1 < aBegin) {
        // FIXME This is very slow. Better use StringUtils.repeat()
        StringBuilder pad = new StringBuilder(); // if there is plenty of spaces between
                                                 // sentences
        for (int i = aPrevEnd + 1; i < aBegin; i++) {
            pad.append(" ");
        }
        coveredText.append(pad).append(aLine).append(LF);
    }
    else {
        coveredText.append(aLine).append(LF);
    }
    Sentence sentence = new Sentence(aJCas, aBegin, aEnd);
    sentence.addToIndexes();
}
 
Example 6
Source Project: bluima   Source File: MongoTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReadWithBetweenQuery() throws Exception {

    String query = "{pmid: { $gt: 12, $lt: 19 }}";
    List<JCas> l = asList(createReader(
            MongoCollectionReader.class, 
            BlueUima.PARAM_DB_CONNECTION, conn, BlueUima.PARAM_QUERY, query));
    assertEquals(1, l.size());

    query = "{pmid: { $gt: 18, $lt: 19 }}";
    l = asList(createReader(MongoCollectionReader.class,
             BlueUima.PARAM_DB_CONNECTION, conn,
            BlueUima.PARAM_QUERY, query));
    assertEquals(0, l.size());

    query = "{pmid: { $gt: 8, $lt: 11 }}";
    l = asList(createReader(MongoCollectionReader.class,
             BlueUima.PARAM_DB_CONNECTION, conn,
            BlueUima.PARAM_QUERY, query));
    assertEquals(0, l.size());
}
 
Example 7
Source Project: bluima   Source File: SkipSomePosAnnotator2.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {

	for (Token t : select(jCas, Token.class)) {

		// filter by POS
		String pos = t.getPos();
		if (pos == null)
			pos = "UNKNOWN";
		if (SKIP_POS.contains(pos)) {
			for (Annotation a : selectCovered(jCas, Annotation.class,
			        t.getBegin(), t.getEnd())) {
				if (BlueCasUtil.haveSameBeginEnd(t, a)
				        && BIO_ANNOTATIONS.contains(a.getClass()))
					a.removeFromIndexes(jCas);
			}
		}
	}
}
 
Example 8
Source Project: baleen   Source File: MetaTags.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void map(JCas jCas, Element element, AnnotationCollector collector) {

  if ("meta".equalsIgnoreCase(element.tagName())) {
    Metadata md = new Metadata(jCas);

    String name = element.attr("name");
    md.setKey(name);

    String content = element.attr("content");
    String charset = element.attr("charset");
    if (!Strings.isNullOrEmpty(content)) {
      md.setValue(content);
    } else if (!Strings.isNullOrEmpty(charset)) {
      md.setValue(charset);
    }

    collector.add(md);
  }
}
 
Example 9
Source Project: bluima   Source File: CooccurrencesEvaluationAnnotator.java    License: Apache License 2.0 6 votes vote down vote up
@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
    String pmId = BlueCasUtil.getHeaderDocId(jCas);
    print("pmId " + pmId);

    JCas goldView, systemView;
    try {
        goldView = jCas.getView(VIEW_GOLD);
        systemView = jCas.getView(VIEW_SYSTEM);
    } catch (CASException e) {
        throw new AnalysisEngineProcessException(e);
    }

    Collection goldAnnot = select(goldView, Cooccurrence.class);
    Collection systAnnot = select(systemView, Cooccurrence.class);
    print("comparing #gold:" + goldAnnot.size() + " #sys:"
            + systAnnot.size());

    print(/* "pmId:"+pmId + "\t" + */evaluator.add(goldAnnot, systAnnot,
            pmId));
}
 
Example 10
Source Project: bluima   Source File: AnnotationFilterAnnotator.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {

    Collection<Keep> keeps = select(jCas, Keep.class);
    Keep[] array = keeps.toArray(new Keep[keeps.size()]);

    for (int i = 0; i < array.length; i++) {
        Class<? extends Annotation> classz = array[i].getEnclosedAnnot()
                .getClass();
        if (annotationClassesList.contains(classz)) {
            array[i].removeFromIndexes();
        }
    }
}
 
Example 11
@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {

	ArrayList<DictTerm> termsToTokenList = new ArrayList<DictTerm>();

	// EMAIL
	if (extractEmail)
		termsToTokenList.addAll(annotateRegex(jcas, REGEX_EMAIL, "EMAIL"));

	// URL
	if (extractUrl)
		termsToTokenList.addAll(annotateRegex(jcas, REGEX_URL, "URL"));

	// IP
	if (extractIp)
		termsToTokenList.addAll(annotateRegex(jcas, REGEX_IP, "IP"));

	// PHONE
	if (extractPhone)
		termsToTokenList.addAll(annotateRegex(jcas, REGEX_PHONE, "PHONE"));

	// Set new token and sentence boundaries for pattern matches
	correctTokenBoundaries(jcas, termsToTokenList);

	// Dictionary multi word units
	annotateMultiWordUnits(jcas);

	// Dictionary unigrams
	Collection<Token> tokens = JCasUtil.select(jcas, Token.class);
	for (Token t : tokens) {
		annotateDictTypes(jcas, t);
	}

}
 
Example 12
Source Project: ambiverse-nlu   Source File: WordShape.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * @param jcas JCas to which this Feature Structure belongs
 * @param begin offset to the begin spot in the SofA
 * @param end offset to the end spot in the SofA 
*/  
public WordShape(JCas jcas, int begin, int end) {
  super(jcas);
  setBegin(begin);
  setEnd(end);
  readObject();
}
 
Example 13
Source Project: inception   Source File: StringMatchingRecommenderTest.java    License: Apache License 2.0 5 votes vote down vote up
private List<CAS> getTestNECas(String aText, String[] aVals, int[][] aNEIndices,
        int[][] aSentIndices, int[][] aTokenIndices)
    throws Exception
{
    JCas jcas = JCasFactory.createText(aText, "de");

    for (int j = 0; j < aSentIndices.length; j++) {
        Sentence newSent = new Sentence(jcas, aSentIndices[j][0], aSentIndices[j][1]);
        newSent.addToIndexes();
    }

    for (int k = 0; k < aTokenIndices.length; k++) {
        Token newToken = new Token(jcas, aTokenIndices[k][0], aTokenIndices[k][1]);
        newToken.addToIndexes();
    }

    for (int i = 0; i < aVals.length; i++) {
        NamedEntity newNE = new NamedEntity(jcas, aNEIndices[i][0], aNEIndices[i][1]);
        newNE.setValue(aVals[i]);
        newNE.addToIndexes();
    }

    List<CAS> casses = new ArrayList<>();
    casses.add(jcas.getCas());

    return casses;
}
 
Example 14
/**
 * Main method to extract entity-based feature-value pairs.
 *
 * @param jCas
 * @param entityMentions
 * @return
 */
public  Map<Integer, Double> getEntityFeatureValues(
    JCas jCas, Collection<AidaEntity> entityMentions, TrainingSettings.FeatureExtractor featureExtractor) throws Exception {

  FeatureSet fs = FeatureSetFactory.createFeatureSet(featureExtractor, entityMentions);

  Map<Integer, Double> features = new HashMap<>();
  for (Feature f : fs.features()) {
    features.putAll(f.extract(jCas));
  }

  return features;
}
 
Example 15
Source Project: bluima   Source File: Event.java    License: Apache License 2.0 5 votes vote down vote up
/** @generated
 * @param jcas JCas to which this Feature Structure belongs
 * @param begin offset to the begin spot in the SofA
 * @param end offset to the end spot in the SofA 
*/  
public Event(JCas jcas, int begin, int end) {
  super(jcas);
  setBegin(begin);
  setEnd(end);
  readObject();
}
 
Example 16
@Test
public void test() throws Exception {

    JCas jcas = getTokenizedTestCas("aa");
    ArrayList<Sentence> sentences = newArrayList(select(jcas,
            Sentence.class));
    assertEquals(1, sentences.size());
    assertEquals("aa", sentences.get(0).getCoveredText());

    jcas = getTokenizedTestCas("aa aa\nbb bb");
    sentences = newArrayList(select(jcas, Sentence.class));
    assertEquals(2, sentences.size());
    assertEquals("aa aa", sentences.get(0).getCoveredText());
    assertEquals("bb bb", sentences.get(1).getCoveredText());

    jcas = getTokenizedTestCas("aa aa\nbb bb\ncc cc\n");
    sentences = newArrayList(select(jcas, Sentence.class));
    assertEquals(3, sentences.size());
    assertEquals("aa aa", sentences.get(0).getCoveredText());
    assertEquals("bb bb", sentences.get(1).getCoveredText());
    assertEquals("cc cc", sentences.get(2).getCoveredText());

    // leading \n is kept
    jcas = getTokenizedTestCas("\naa");
    sentences = newArrayList(select(jcas, Sentence.class));
    assertEquals(2, sentences.size());
    assertEquals("", sentences.get(0).getCoveredText());
    assertEquals("aa", sentences.get(1).getCoveredText());
}
 
Example 17
Source Project: bluima   Source File: MultipleProteins.java    License: Apache License 2.0 5 votes vote down vote up
/** @generated
 * @param jcas JCas to which this Feature Structure belongs
 * @param begin offset to the begin spot in the SofA
 * @param end offset to the end spot in the SofA 
*/  
public MultipleProteins(JCas jcas, int begin, int end) {
  super(jcas);
  setBegin(begin);
  setEnd(end);
  readObject();
}
 
Example 18
Source Project: webanno   Source File: WebAnnoTsv3WriterTestBase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testUnsetSlotFeature() throws Exception
{
    JCas jcas = makeJCasOneSentence();
    CAS cas = jcas.getCas();
    
    List<Token> tokens = new ArrayList<>(select(jcas, Token.class));
    
    Token t1 = tokens.get(0);
    Token t2 = tokens.get(1);
    Token t3 = tokens.get(2);
    
    Type type = cas.getTypeSystem().getType("webanno.custom.SimpleSpan");
    AnnotationFS s2 = cas.createAnnotation(type, t2.getBegin(), t2.getEnd());
    cas.addFsToIndexes(s2);
    AnnotationFS s3 = cas.createAnnotation(type, t3.getBegin(), t3.getEnd());
    cas.addFsToIndexes(s3);

    makeLinkHostFS(jcas, "webanno.custom.FlexLinkHost", t1.getBegin(), t1.getEnd(),
            (FeatureStructure[]) null);
    
    writeAndAssertEquals(jcas, 
            WebannoTsv3Writer.PARAM_SLOT_FEATS, asList("webanno.custom.FlexLinkHost:links"),
            WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList("webanno.custom.SimpleSpan", 
                    "webanno.custom.SimpleLinkHost"),
            WebannoTsv3Writer.PARAM_LINK_TYPES, asList("webanno.custom.FlexLinkType"),
            WebannoTsv3Writer.PARAM_SLOT_TARGETS, asList("webanno.custom.SimpleSpan"));
}
 
Example 19
Source Project: baleen   Source File: TemplateRecordDefinitionAnnotator.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates a record definition annotation and adds it to the JCas indexes.
 *
 * @param jCas the JCas
 * @param matcher the matcher that triggered the creation, which must have two groups (first being
 *     the name, and the second being the content within the record)
 */
private void createRecordDefinitionAnnotation(JCas jCas, Matcher matcher) {
  TemplateRecordDefinition recordDefinition = new TemplateRecordDefinition(jCas);
  recordDefinition.setName(matcher.group(1));
  recordDefinition.setBegin(matcher.start(3));
  recordDefinition.setEnd(matcher.end(3));
  recordDefinition.setConfidence(1.0);
  addAttributes(recordDefinition, "<record:" + matcher.group(1) + matcher.group(2) + " />");
  addToJCasIndex(recordDefinition);
}
 
Example 20
Source Project: webanno   Source File: WebannoTsv1Reader.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Add sentence layer to CAS
 */
private void createSentence(JCas aJCas, List<Integer> firstTokenInSentence,
        Map<String, Token> tokensStored)
{
    for (int i = 0; i < firstTokenInSentence.size(); i++) {
        Sentence outSentence = new Sentence(aJCas);
        // Only last sentence, and no the only sentence in the document (i!=0)
        if (i == firstTokenInSentence.size() - 1 && i != 0) {
            outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i)).getEnd());
            outSentence.setEnd(tokensStored.get("t_" + (tokensStored.size())).getEnd());
            outSentence.addToIndexes();
            break;
        }
        if (i == firstTokenInSentence.size() - 1 && i == 0) {
            outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i))
                    .getBegin());
            outSentence.setEnd(tokensStored.get("t_" + (tokensStored.size())).getEnd());
            outSentence.addToIndexes();
        }
        else if (i == 0) {
            outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i))
                    .getBegin());
            outSentence.setEnd(tokensStored.get("t_" + firstTokenInSentence.get(i + 1))
                    .getEnd());
            outSentence.addToIndexes();
        }
        else {
            outSentence.setBegin(
                    tokensStored.get("t_" + firstTokenInSentence.get(i)).getEnd() + 1);
            outSentence
                    .setEnd(tokensStored.get("t_" + firstTokenInSentence.get(i + 1)).getEnd());
            outSentence.addToIndexes();
        }
    }
}
 
Example 21
Source Project: bluima   Source File: PHYS_Inverse.java    License: Apache License 2.0 5 votes vote down vote up
/** @generated
 * @param jcas JCas to which this Feature Structure belongs
 * @param begin offset to the begin spot in the SofA
 * @param end offset to the end spot in the SofA 
*/  
public PHYS_Inverse(JCas jcas, int begin, int end) {
  super(jcas);
  setBegin(begin);
  setEnd(end);
  readObject();
}
 
Example 22
Source Project: baleen   Source File: DocumentRelationshipAnnotator.java    License: Apache License 2.0 5 votes vote down vote up
protected List<Relation> createSentenceRelation(
    final JCas jCas,
    final Collection<Entity> sentence1entities,
    final Collection<Entity> sentence2entities,
    final Offset offset,
    final int distance) {

  final List<Relation> relations = new LinkedList<>();

  for (Entity source : sentence1entities) {
    for (Entity target : sentence2entities) {
      Relation relation =
          createRelation(
              jCas,
              source,
              target,
              offset.getBegin(),
              offset.getEnd(),
              type,
              subType,
              "",
              confidence);
      relation.setSentenceDistance(distance);
      relation.setWordDistance(-1);
      relation.setDependencyDistance(-1);
      relations.add(relation);
    }
  }

  return relations;
}
 
Example 23
Source Project: webanno   Source File: WebAnnoTsv3WriterTestBase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testMultiTokenSpanWithoutFeatureValue() throws Exception
{
    JCas jcas = makeJCasOneSentence();
    
    Span ne = new Span(jcas, 0, jcas.getDocumentText().length());
    ne.addToIndexes();
    
    writeAndAssertEquals(jcas, 
            WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(Span.class));
}
 
Example 24
Source Project: termsuite-core   Source File: RegexSpotter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected void afterRuleProcessing(JCas jCas) {
	this.sw.stop();
	totalTimeInMillis.addAndGet(this.sw.elapsed(TimeUnit.MILLISECONDS));
	LOGGER.debug("Processed MWT spotting on doc {} in {}ms [Cumulated: {}ms]", 
			JCasUtils.getSourceDocumentAnnotation(jCas).get().getUri(), 
			sw.elapsed(TimeUnit.MILLISECONDS), totalTimeInMillis.get());
	flushOccurrenceBuffer(jCas);
}
 
Example 25
Source Project: ambiverse-nlu   Source File: UiucWikiLocationsRedirects.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * @param jcas JCas to which this Feature Structure belongs
 * @param begin offset to the begin spot in the SofA
 * @param end offset to the end spot in the SofA 
*/  
public UiucWikiLocationsRedirects(JCas jcas, int begin, int end) {
  super(jcas);
  setBegin(begin);
  setEnd(end);
  readObject();
}
 
Example 26
Source Project: uima-uimaj   Source File: Sentence.java    License: Apache License 2.0 5 votes vote down vote up
/** @generated
 * @param jcas JCas to which this Feature Structure belongs
 * @param begin offset to the begin spot in the SofA
 * @param end offset to the end spot in the SofA 
*/  
public Sentence(JCas jcas, int begin, int end) {
  super(jcas);
  setBegin(begin);
  setEnd(end);
  readObject();
}
 
Example 27
Source Project: bluima   Source File: Size.java    License: Apache License 2.0 5 votes vote down vote up
/** @generated
 * @param jcas JCas to which this Feature Structure belongs
 * @param begin offset to the begin spot in the SofA
 * @param end offset to the end spot in the SofA 
*/  
public Size(JCas jcas, int begin, int end) {
  super(jcas);
  setBegin(begin);
  setEnd(end);
  readObject();
}
 
Example 28
Source Project: uima-uimaj   Source File: IndexRepositoryTest.java    License: Apache License 2.0 5 votes vote down vote up
public void testMissingSofaRef() throws Exception {
  JCas jcas = cas.getJCas();
  Annotation a = new Annotation(jcas, 0, 4);
  FeatureImpl feat = (FeatureImpl) cas.getTypeSystem().getType(CAS.TYPE_NAME_ANNOTATION_BASE)
                       .getFeatureByBaseName(CAS.FEATURE_BASE_NAME_SOFA);
  a._setFeatureValueNcNj(feat, null);
  try {
    jcas.addFsToIndexes(a);
  } catch (CASRuntimeException e) {
    assertEquals("SOFAREF_NOT_SET", e.getMessageKey());
    return;
  }
  fail("required exception not thrown"); // fail
}
 
Example 29
Source Project: baleen   Source File: AbstractMetadataRegexAnnotator.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected Metadata create(JCas jCas, Matcher matcher) {
  Metadata md = new Metadata(jCas);
  md.setKey(key);

  String value = matcher.group(valueGroup);
  md.setValue(convertValue(value));
  return md;
}
 
Example 30
Source Project: bluima   Source File: RoomNumber.java    License: Apache License 2.0 5 votes vote down vote up
/** @generated
 * @param jcas JCas to which this Feature Structure belongs
 * @param begin offset to the begin spot in the SofA
 * @param end offset to the end spot in the SofA 
*/  
public RoomNumber(JCas jcas, int begin, int end) {
  super(jcas);
  setBegin(begin);
  setEnd(end);
  readObject();
}