Java Code Examples for org.apache.uima.jcas.JCas
The following examples show how to use
org.apache.uima.jcas.JCas.
These examples are extracted from open source projects.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: bluima Author: BlueBrain File: WhiteTextCollectionReader.java License: Apache License 2.0 | 6 votes |
public void getNext(JCas jcas) throws IOException, CollectionException { Element articleE = articleIt.next(); String pmid = articleE.getChildText("PMID"); LOG.trace("processing pmId {}", pmid); currentNrDocs++; StringBuilder sb = new StringBuilder(); int i = addAnnotations(jcas, articleE.getChild("ArticleTitle") .getContent(), sb, 0); sb.append(" ");// add "space" i++; addAnnotations(jcas, articleE.getChild("AbstractText").getContent(), sb, i); jcas.setDocumentText(sb.toString()); Header h = new Header(jcas); h.setDocId(pmid); h.addToIndexes(); }
Example #2
Source Project: uima-uimafit Author: apache File: ExternalResourceFactoryTest.java License: Apache License 2.0 | 6 votes |
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { assertNotNull("Resource array is null", resourceArray); assertEquals(2, resourceArray.length); assertTrue("Resource array element 0 is not a DummyResource", resourceArray[0] instanceof ResourceWithAssert); assertTrue("Resource array element 1 is not a DummyResource", resourceArray[1] instanceof ResourceWithAssert); assertTrue(resourceArray[0] != resourceArray[1]); resources.add(resourceArray[0]); resources.add(resourceArray[1]); System.out.printf("Element object 0: %d%n", resourceArray[0].hashCode()); System.out.printf("Element object 1: %d%n", resourceArray[1].hashCode()); for (ResourceWithAssert res : resourceArray) { res.doAsserts(); } }
Example #3
Source Project: bluima Author: BlueBrain File: FiltersTest.java License: Apache License 2.0 | 6 votes |
@Test public void testNormalizeMeasures() throws Exception { JCas jCas = getTestCas("ab12 mm"); Measure m = createAnnot(jCas, Measure.class, 2, 6); m.setUnit("mm"); m.setValue(12f); createAnnot(jCas, Keep.class, 2, 6).setEnclosedAnnot(m); // -> NOT filtered run(jCas, "tokenFrequencyFile", "stopwords_empty", 0, 5000); for (Keep select : select(jCas, Keep.class)) { Prin.t(select); } assertTrue(exists(jCas, Keep.class)); assertEquals(MeasureNormalizerAnnotator.MEASURE_MASK + "mm", select(jCas, Keep.class).iterator().next().getNormalizedText()); }
Example #4
Source Project: uima-uimaj Author: apache File: BinaryCasSerDes4.java License: Apache License 2.0 | 6 votes |
/** * * @param cas CAS to serialize * @param out output object * @param trackingMark tracking mark (for delta serialization) * @param compressLevel - * @param compressStrategy - * @return null or serialization measurements (depending on setting of doMeasurements) * @throws IOException if the marker is invalid */ public SerializationMeasures serialize(AbstractCas cas, Object out, Marker trackingMark, CompressLevel compressLevel, CompressStrat compressStrategy) throws IOException { SerializationMeasures sm = (doMeasurements) ? new SerializationMeasures() : null; CASImpl casImpl = (CASImpl) ((cas instanceof JCas) ? ((JCas)cas).getCas(): cas); if (null != trackingMark && !trackingMark.isValid() ) { throw new CASRuntimeException(CASRuntimeException.INVALID_MARKER, "Invalid Marker."); } Serializer serializer = new Serializer( casImpl, makeDataOutputStream(out), (MarkerImpl) trackingMark, sm, compressLevel, compressStrategy, false); serializer.serialize(); return sm; }
Example #5
Source Project: webanno Author: webanno File: WebannoTsv3Reader.java License: Apache License 2.0 | 6 votes |
private void createSentence(JCas aJCas, String aLine, int aBegin, int aEnd, int aPrevEnd) { // If the next sentence immediately follows the last one without any space or line break // in between, then we need to chop off again the linebreak that we added at the end of the // last sentence - otherwise offsets will be off on a round-trip. if (aPrevEnd == aBegin && coveredText.length() > 0 && (coveredText.charAt(coveredText.length() - 1) == '\n')) { coveredText.deleteCharAt(coveredText.length() - 1); } if (aPrevEnd + 1 < aBegin) { // FIXME This is very slow. Better use StringUtils.repeat() StringBuilder pad = new StringBuilder(); // if there is plenty of spaces between // sentences for (int i = aPrevEnd + 1; i < aBegin; i++) { pad.append(" "); } coveredText.append(pad).append(aLine).append(LF); } else { coveredText.append(aLine).append(LF); } Sentence sentence = new Sentence(aJCas, aBegin, aEnd); sentence.addToIndexes(); }
Example #6
Source Project: bluima Author: BlueBrain File: MongoTest.java License: Apache License 2.0 | 6 votes |
@Test public void testReadWithBetweenQuery() throws Exception { String query = "{pmid: { $gt: 12, $lt: 19 }}"; List<JCas> l = asList(createReader( MongoCollectionReader.class, BlueUima.PARAM_DB_CONNECTION, conn, BlueUima.PARAM_QUERY, query)); assertEquals(1, l.size()); query = "{pmid: { $gt: 18, $lt: 19 }}"; l = asList(createReader(MongoCollectionReader.class, BlueUima.PARAM_DB_CONNECTION, conn, BlueUima.PARAM_QUERY, query)); assertEquals(0, l.size()); query = "{pmid: { $gt: 8, $lt: 11 }}"; l = asList(createReader(MongoCollectionReader.class, BlueUima.PARAM_DB_CONNECTION, conn, BlueUima.PARAM_QUERY, query)); assertEquals(0, l.size()); }
Example #7
Source Project: bluima Author: BlueBrain File: SkipSomePosAnnotator2.java License: Apache License 2.0 | 6 votes |
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { for (Token t : select(jCas, Token.class)) { // filter by POS String pos = t.getPos(); if (pos == null) pos = "UNKNOWN"; if (SKIP_POS.contains(pos)) { for (Annotation a : selectCovered(jCas, Annotation.class, t.getBegin(), t.getEnd())) { if (BlueCasUtil.haveSameBeginEnd(t, a) && BIO_ANNOTATIONS.contains(a.getClass())) a.removeFromIndexes(jCas); } } } }
Example #8
Source Project: baleen Author: dstl File: MetaTags.java License: Apache License 2.0 | 6 votes |
@Override public void map(JCas jCas, Element element, AnnotationCollector collector) { if ("meta".equalsIgnoreCase(element.tagName())) { Metadata md = new Metadata(jCas); String name = element.attr("name"); md.setKey(name); String content = element.attr("content"); String charset = element.attr("charset"); if (!Strings.isNullOrEmpty(content)) { md.setValue(content); } else if (!Strings.isNullOrEmpty(charset)) { md.setValue(charset); } collector.add(md); } }
Example #9
Source Project: bluima Author: BlueBrain File: CooccurrencesEvaluationAnnotator.java License: Apache License 2.0 | 6 votes |
@SuppressWarnings({ "unchecked", "rawtypes" }) @Override public void process(JCas jCas) throws AnalysisEngineProcessException { String pmId = BlueCasUtil.getHeaderDocId(jCas); print("pmId " + pmId); JCas goldView, systemView; try { goldView = jCas.getView(VIEW_GOLD); systemView = jCas.getView(VIEW_SYSTEM); } catch (CASException e) { throw new AnalysisEngineProcessException(e); } Collection goldAnnot = select(goldView, Cooccurrence.class); Collection systAnnot = select(systemView, Cooccurrence.class); print("comparing #gold:" + goldAnnot.size() + " #sys:" + systAnnot.size()); print(/* "pmId:"+pmId + "\t" + */evaluator.add(goldAnnot, systAnnot, pmId)); }
Example #10
Source Project: bluima Author: BlueBrain File: AnnotationFilterAnnotator.java License: Apache License 2.0 | 5 votes |
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { Collection<Keep> keeps = select(jCas, Keep.class); Keep[] array = keeps.toArray(new Keep[keeps.size()]); for (int i = 0; i < array.length; i++) { Class<? extends Annotation> classz = array[i].getEnclosedAnnot() .getClass(); if (annotationClassesList.contains(classz)) { array[i].removeFromIndexes(); } } }
Example #11
Source Project: newsleak Author: uhh-lt File: DictionaryExtractor.java License: GNU Affero General Public License v3.0 | 5 votes |
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { ArrayList<DictTerm> termsToTokenList = new ArrayList<DictTerm>(); // EMAIL if (extractEmail) termsToTokenList.addAll(annotateRegex(jcas, REGEX_EMAIL, "EMAIL")); // URL if (extractUrl) termsToTokenList.addAll(annotateRegex(jcas, REGEX_URL, "URL")); // IP if (extractIp) termsToTokenList.addAll(annotateRegex(jcas, REGEX_IP, "IP")); // PHONE if (extractPhone) termsToTokenList.addAll(annotateRegex(jcas, REGEX_PHONE, "PHONE")); // Set new token and sentence boundaries for pattern matches correctTokenBoundaries(jcas, termsToTokenList); // Dictionary multi word units annotateMultiWordUnits(jcas); // Dictionary unigrams Collection<Token> tokens = JCasUtil.select(jcas, Token.class); for (Token t : tokens) { annotateDictTypes(jcas, t); } }
Example #12
Source Project: ambiverse-nlu Author: ambiverse-nlu File: WordShape.java License: Apache License 2.0 | 5 votes |
/** * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public WordShape(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
Example #13
Source Project: inception Author: inception-project File: StringMatchingRecommenderTest.java License: Apache License 2.0 | 5 votes |
private List<CAS> getTestNECas(String aText, String[] aVals, int[][] aNEIndices, int[][] aSentIndices, int[][] aTokenIndices) throws Exception { JCas jcas = JCasFactory.createText(aText, "de"); for (int j = 0; j < aSentIndices.length; j++) { Sentence newSent = new Sentence(jcas, aSentIndices[j][0], aSentIndices[j][1]); newSent.addToIndexes(); } for (int k = 0; k < aTokenIndices.length; k++) { Token newToken = new Token(jcas, aTokenIndices[k][0], aTokenIndices[k][1]); newToken.addToIndexes(); } for (int i = 0; i < aVals.length; i++) { NamedEntity newNE = new NamedEntity(jcas, aNEIndices[i][0], aNEIndices[i][1]); newNE.setValue(aVals[i]); newNE.addToIndexes(); } List<CAS> casses = new ArrayList<>(); casses.add(jcas.getCas()); return casses; }
Example #14
Source Project: ambiverse-nlu Author: ambiverse-nlu File: NYTEntitySalienceFeatureExtractor.java License: Apache License 2.0 | 5 votes |
/** * Main method to extract entity-based feature-value pairs. * * @param jCas * @param entityMentions * @return */ public Map<Integer, Double> getEntityFeatureValues( JCas jCas, Collection<AidaEntity> entityMentions, TrainingSettings.FeatureExtractor featureExtractor) throws Exception { FeatureSet fs = FeatureSetFactory.createFeatureSet(featureExtractor, entityMentions); Map<Integer, Double> features = new HashMap<>(); for (Feature f : fs.features()) { features.putAll(f.extract(jCas)); } return features; }
Example #15
Source Project: bluima Author: BlueBrain File: Event.java License: Apache License 2.0 | 5 votes |
/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public Event(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
Example #16
Source Project: bluima Author: BlueBrain File: NewlineSentenceSplitterAnnotatorTest.java License: Apache License 2.0 | 5 votes |
@Test public void test() throws Exception { JCas jcas = getTokenizedTestCas("aa"); ArrayList<Sentence> sentences = newArrayList(select(jcas, Sentence.class)); assertEquals(1, sentences.size()); assertEquals("aa", sentences.get(0).getCoveredText()); jcas = getTokenizedTestCas("aa aa\nbb bb"); sentences = newArrayList(select(jcas, Sentence.class)); assertEquals(2, sentences.size()); assertEquals("aa aa", sentences.get(0).getCoveredText()); assertEquals("bb bb", sentences.get(1).getCoveredText()); jcas = getTokenizedTestCas("aa aa\nbb bb\ncc cc\n"); sentences = newArrayList(select(jcas, Sentence.class)); assertEquals(3, sentences.size()); assertEquals("aa aa", sentences.get(0).getCoveredText()); assertEquals("bb bb", sentences.get(1).getCoveredText()); assertEquals("cc cc", sentences.get(2).getCoveredText()); // leading \n is kept jcas = getTokenizedTestCas("\naa"); sentences = newArrayList(select(jcas, Sentence.class)); assertEquals(2, sentences.size()); assertEquals("", sentences.get(0).getCoveredText()); assertEquals("aa", sentences.get(1).getCoveredText()); }
Example #17
Source Project: bluima Author: BlueBrain File: MultipleProteins.java License: Apache License 2.0 | 5 votes |
/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public MultipleProteins(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
Example #18
Source Project: webanno Author: webanno File: WebAnnoTsv3WriterTestBase.java License: Apache License 2.0 | 5 votes |
@Test public void testUnsetSlotFeature() throws Exception { JCas jcas = makeJCasOneSentence(); CAS cas = jcas.getCas(); List<Token> tokens = new ArrayList<>(select(jcas, Token.class)); Token t1 = tokens.get(0); Token t2 = tokens.get(1); Token t3 = tokens.get(2); Type type = cas.getTypeSystem().getType("webanno.custom.SimpleSpan"); AnnotationFS s2 = cas.createAnnotation(type, t2.getBegin(), t2.getEnd()); cas.addFsToIndexes(s2); AnnotationFS s3 = cas.createAnnotation(type, t3.getBegin(), t3.getEnd()); cas.addFsToIndexes(s3); makeLinkHostFS(jcas, "webanno.custom.FlexLinkHost", t1.getBegin(), t1.getEnd(), (FeatureStructure[]) null); writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SLOT_FEATS, asList("webanno.custom.FlexLinkHost:links"), WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList("webanno.custom.SimpleSpan", "webanno.custom.SimpleLinkHost"), WebannoTsv3Writer.PARAM_LINK_TYPES, asList("webanno.custom.FlexLinkType"), WebannoTsv3Writer.PARAM_SLOT_TARGETS, asList("webanno.custom.SimpleSpan")); }
Example #19
Source Project: baleen Author: dstl File: TemplateRecordDefinitionAnnotator.java License: Apache License 2.0 | 5 votes |
/** * Creates a record definition annotation and adds it to the JCas indexes. * * @param jCas the JCas * @param matcher the matcher that triggered the creation, which must have two groups (first being * the name, and the second being the content within the record) */ private void createRecordDefinitionAnnotation(JCas jCas, Matcher matcher) { TemplateRecordDefinition recordDefinition = new TemplateRecordDefinition(jCas); recordDefinition.setName(matcher.group(1)); recordDefinition.setBegin(matcher.start(3)); recordDefinition.setEnd(matcher.end(3)); recordDefinition.setConfidence(1.0); addAttributes(recordDefinition, "<record:" + matcher.group(1) + matcher.group(2) + " />"); addToJCasIndex(recordDefinition); }
Example #20
Source Project: webanno Author: webanno File: WebannoTsv1Reader.java License: Apache License 2.0 | 5 votes |
/** * Add sentence layer to CAS */ private void createSentence(JCas aJCas, List<Integer> firstTokenInSentence, Map<String, Token> tokensStored) { for (int i = 0; i < firstTokenInSentence.size(); i++) { Sentence outSentence = new Sentence(aJCas); // Only last sentence, and no the only sentence in the document (i!=0) if (i == firstTokenInSentence.size() - 1 && i != 0) { outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i)).getEnd()); outSentence.setEnd(tokensStored.get("t_" + (tokensStored.size())).getEnd()); outSentence.addToIndexes(); break; } if (i == firstTokenInSentence.size() - 1 && i == 0) { outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i)) .getBegin()); outSentence.setEnd(tokensStored.get("t_" + (tokensStored.size())).getEnd()); outSentence.addToIndexes(); } else if (i == 0) { outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i)) .getBegin()); outSentence.setEnd(tokensStored.get("t_" + firstTokenInSentence.get(i + 1)) .getEnd()); outSentence.addToIndexes(); } else { outSentence.setBegin( tokensStored.get("t_" + firstTokenInSentence.get(i)).getEnd() + 1); outSentence .setEnd(tokensStored.get("t_" + firstTokenInSentence.get(i + 1)).getEnd()); outSentence.addToIndexes(); } } }
Example #21
Source Project: bluima Author: BlueBrain File: PHYS_Inverse.java License: Apache License 2.0 | 5 votes |
/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public PHYS_Inverse(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
Example #22
Source Project: baleen Author: dstl File: DocumentRelationshipAnnotator.java License: Apache License 2.0 | 5 votes |
protected List<Relation> createSentenceRelation( final JCas jCas, final Collection<Entity> sentence1entities, final Collection<Entity> sentence2entities, final Offset offset, final int distance) { final List<Relation> relations = new LinkedList<>(); for (Entity source : sentence1entities) { for (Entity target : sentence2entities) { Relation relation = createRelation( jCas, source, target, offset.getBegin(), offset.getEnd(), type, subType, "", confidence); relation.setSentenceDistance(distance); relation.setWordDistance(-1); relation.setDependencyDistance(-1); relations.add(relation); } } return relations; }
Example #23
Source Project: webanno Author: webanno File: WebAnnoTsv3WriterTestBase.java License: Apache License 2.0 | 5 votes |
@Test public void testMultiTokenSpanWithoutFeatureValue() throws Exception { JCas jcas = makeJCasOneSentence(); Span ne = new Span(jcas, 0, jcas.getDocumentText().length()); ne.addToIndexes(); writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(Span.class)); }
Example #24
Source Project: termsuite-core Author: termsuite File: RegexSpotter.java License: Apache License 2.0 | 5 votes |
@Override protected void afterRuleProcessing(JCas jCas) { this.sw.stop(); totalTimeInMillis.addAndGet(this.sw.elapsed(TimeUnit.MILLISECONDS)); LOGGER.debug("Processed MWT spotting on doc {} in {}ms [Cumulated: {}ms]", JCasUtils.getSourceDocumentAnnotation(jCas).get().getUri(), sw.elapsed(TimeUnit.MILLISECONDS), totalTimeInMillis.get()); flushOccurrenceBuffer(jCas); }
Example #25
Source Project: ambiverse-nlu Author: ambiverse-nlu File: UiucWikiLocationsRedirects.java License: Apache License 2.0 | 5 votes |
/** * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public UiucWikiLocationsRedirects(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
Example #26
Source Project: uima-uimaj Author: apache File: Sentence.java License: Apache License 2.0 | 5 votes |
/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public Sentence(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
Example #27
Source Project: bluima Author: BlueBrain File: Size.java License: Apache License 2.0 | 5 votes |
/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public Size(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
Example #28
Source Project: uima-uimaj Author: apache File: IndexRepositoryTest.java License: Apache License 2.0 | 5 votes |
public void testMissingSofaRef() throws Exception { JCas jcas = cas.getJCas(); Annotation a = new Annotation(jcas, 0, 4); FeatureImpl feat = (FeatureImpl) cas.getTypeSystem().getType(CAS.TYPE_NAME_ANNOTATION_BASE) .getFeatureByBaseName(CAS.FEATURE_BASE_NAME_SOFA); a._setFeatureValueNcNj(feat, null); try { jcas.addFsToIndexes(a); } catch (CASRuntimeException e) { assertEquals("SOFAREF_NOT_SET", e.getMessageKey()); return; } fail("required exception not thrown"); // fail }
Example #29
Source Project: baleen Author: dstl File: AbstractMetadataRegexAnnotator.java License: Apache License 2.0 | 5 votes |
@Override protected Metadata create(JCas jCas, Matcher matcher) { Metadata md = new Metadata(jCas); md.setKey(key); String value = matcher.group(valueGroup); md.setValue(convertValue(value)); return md; }
Example #30
Source Project: bluima Author: BlueBrain File: RoomNumber.java License: Apache License 2.0 | 5 votes |
/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public RoomNumber(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }