Java Code Examples for org.apache.uima.jcas.JCas
The following examples show how to use
org.apache.uima.jcas.JCas. These examples are extracted from open source projects.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: bluima Source File: WhiteTextCollectionReader.java License: Apache License 2.0 | 6 votes |
public void getNext(JCas jcas) throws IOException, CollectionException { Element articleE = articleIt.next(); String pmid = articleE.getChildText("PMID"); LOG.trace("processing pmId {}", pmid); currentNrDocs++; StringBuilder sb = new StringBuilder(); int i = addAnnotations(jcas, articleE.getChild("ArticleTitle") .getContent(), sb, 0); sb.append(" ");// add "space" i++; addAnnotations(jcas, articleE.getChild("AbstractText").getContent(), sb, i); jcas.setDocumentText(sb.toString()); Header h = new Header(jcas); h.setDocId(pmid); h.addToIndexes(); }
Example 2
Source Project: uima-uimafit Source File: ExternalResourceFactoryTest.java License: Apache License 2.0 | 6 votes |
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { assertNotNull("Resource array is null", resourceArray); assertEquals(2, resourceArray.length); assertTrue("Resource array element 0 is not a DummyResource", resourceArray[0] instanceof ResourceWithAssert); assertTrue("Resource array element 1 is not a DummyResource", resourceArray[1] instanceof ResourceWithAssert); assertTrue(resourceArray[0] != resourceArray[1]); resources.add(resourceArray[0]); resources.add(resourceArray[1]); System.out.printf("Element object 0: %d%n", resourceArray[0].hashCode()); System.out.printf("Element object 1: %d%n", resourceArray[1].hashCode()); for (ResourceWithAssert res : resourceArray) { res.doAsserts(); } }
Example 3
Source Project: bluima Source File: FiltersTest.java License: Apache License 2.0 | 6 votes |
@Test public void testNormalizeMeasures() throws Exception { JCas jCas = getTestCas("ab12 mm"); Measure m = createAnnot(jCas, Measure.class, 2, 6); m.setUnit("mm"); m.setValue(12f); createAnnot(jCas, Keep.class, 2, 6).setEnclosedAnnot(m); // -> NOT filtered run(jCas, "tokenFrequencyFile", "stopwords_empty", 0, 5000); for (Keep select : select(jCas, Keep.class)) { Prin.t(select); } assertTrue(exists(jCas, Keep.class)); assertEquals(MeasureNormalizerAnnotator.MEASURE_MASK + "mm", select(jCas, Keep.class).iterator().next().getNormalizedText()); }
Example 4
Source Project: uima-uimaj Source File: BinaryCasSerDes4.java License: Apache License 2.0 | 6 votes |
/** * * @param cas CAS to serialize * @param out output object * @param trackingMark tracking mark (for delta serialization) * @param compressLevel - * @param compressStrategy - * @return null or serialization measurements (depending on setting of doMeasurements) * @throws IOException if the marker is invalid */ public SerializationMeasures serialize(AbstractCas cas, Object out, Marker trackingMark, CompressLevel compressLevel, CompressStrat compressStrategy) throws IOException { SerializationMeasures sm = (doMeasurements) ? new SerializationMeasures() : null; CASImpl casImpl = (CASImpl) ((cas instanceof JCas) ? ((JCas)cas).getCas(): cas); if (null != trackingMark && !trackingMark.isValid() ) { throw new CASRuntimeException(CASRuntimeException.INVALID_MARKER, "Invalid Marker."); } Serializer serializer = new Serializer( casImpl, makeDataOutputStream(out), (MarkerImpl) trackingMark, sm, compressLevel, compressStrategy, false); serializer.serialize(); return sm; }
Example 5
Source Project: webanno Source File: WebannoTsv3Reader.java License: Apache License 2.0 | 6 votes |
private void createSentence(JCas aJCas, String aLine, int aBegin, int aEnd, int aPrevEnd) { // If the next sentence immediately follows the last one without any space or line break // in between, then we need to chop off again the linebreak that we added at the end of the // last sentence - otherwise offsets will be off on a round-trip. if (aPrevEnd == aBegin && coveredText.length() > 0 && (coveredText.charAt(coveredText.length() - 1) == '\n')) { coveredText.deleteCharAt(coveredText.length() - 1); } if (aPrevEnd + 1 < aBegin) { // FIXME This is very slow. Better use StringUtils.repeat() StringBuilder pad = new StringBuilder(); // if there is plenty of spaces between // sentences for (int i = aPrevEnd + 1; i < aBegin; i++) { pad.append(" "); } coveredText.append(pad).append(aLine).append(LF); } else { coveredText.append(aLine).append(LF); } Sentence sentence = new Sentence(aJCas, aBegin, aEnd); sentence.addToIndexes(); }
Example 6
Source Project: bluima Source File: MongoTest.java License: Apache License 2.0 | 6 votes |
@Test public void testReadWithBetweenQuery() throws Exception { String query = "{pmid: { $gt: 12, $lt: 19 }}"; List<JCas> l = asList(createReader( MongoCollectionReader.class, BlueUima.PARAM_DB_CONNECTION, conn, BlueUima.PARAM_QUERY, query)); assertEquals(1, l.size()); query = "{pmid: { $gt: 18, $lt: 19 }}"; l = asList(createReader(MongoCollectionReader.class, BlueUima.PARAM_DB_CONNECTION, conn, BlueUima.PARAM_QUERY, query)); assertEquals(0, l.size()); query = "{pmid: { $gt: 8, $lt: 11 }}"; l = asList(createReader(MongoCollectionReader.class, BlueUima.PARAM_DB_CONNECTION, conn, BlueUima.PARAM_QUERY, query)); assertEquals(0, l.size()); }
Example 7
Source Project: bluima Source File: SkipSomePosAnnotator2.java License: Apache License 2.0 | 6 votes |
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { for (Token t : select(jCas, Token.class)) { // filter by POS String pos = t.getPos(); if (pos == null) pos = "UNKNOWN"; if (SKIP_POS.contains(pos)) { for (Annotation a : selectCovered(jCas, Annotation.class, t.getBegin(), t.getEnd())) { if (BlueCasUtil.haveSameBeginEnd(t, a) && BIO_ANNOTATIONS.contains(a.getClass())) a.removeFromIndexes(jCas); } } } }
Example 8
Source Project: baleen Source File: MetaTags.java License: Apache License 2.0 | 6 votes |
@Override public void map(JCas jCas, Element element, AnnotationCollector collector) { if ("meta".equalsIgnoreCase(element.tagName())) { Metadata md = new Metadata(jCas); String name = element.attr("name"); md.setKey(name); String content = element.attr("content"); String charset = element.attr("charset"); if (!Strings.isNullOrEmpty(content)) { md.setValue(content); } else if (!Strings.isNullOrEmpty(charset)) { md.setValue(charset); } collector.add(md); } }
Example 9
Source Project: bluima Source File: CooccurrencesEvaluationAnnotator.java License: Apache License 2.0 | 6 votes |
@SuppressWarnings({ "unchecked", "rawtypes" }) @Override public void process(JCas jCas) throws AnalysisEngineProcessException { String pmId = BlueCasUtil.getHeaderDocId(jCas); print("pmId " + pmId); JCas goldView, systemView; try { goldView = jCas.getView(VIEW_GOLD); systemView = jCas.getView(VIEW_SYSTEM); } catch (CASException e) { throw new AnalysisEngineProcessException(e); } Collection goldAnnot = select(goldView, Cooccurrence.class); Collection systAnnot = select(systemView, Cooccurrence.class); print("comparing #gold:" + goldAnnot.size() + " #sys:" + systAnnot.size()); print(/* "pmId:"+pmId + "\t" + */evaluator.add(goldAnnot, systAnnot, pmId)); }
Example 10
Source Project: bluima Source File: AnnotationFilterAnnotator.java License: Apache License 2.0 | 5 votes |
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { Collection<Keep> keeps = select(jCas, Keep.class); Keep[] array = keeps.toArray(new Keep[keeps.size()]); for (int i = 0; i < array.length; i++) { Class<? extends Annotation> classz = array[i].getEnclosedAnnot() .getClass(); if (annotationClassesList.contains(classz)) { array[i].removeFromIndexes(); } } }
Example 11
Source Project: newsleak Source File: DictionaryExtractor.java License: GNU Affero General Public License v3.0 | 5 votes |
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { ArrayList<DictTerm> termsToTokenList = new ArrayList<DictTerm>(); // EMAIL if (extractEmail) termsToTokenList.addAll(annotateRegex(jcas, REGEX_EMAIL, "EMAIL")); // URL if (extractUrl) termsToTokenList.addAll(annotateRegex(jcas, REGEX_URL, "URL")); // IP if (extractIp) termsToTokenList.addAll(annotateRegex(jcas, REGEX_IP, "IP")); // PHONE if (extractPhone) termsToTokenList.addAll(annotateRegex(jcas, REGEX_PHONE, "PHONE")); // Set new token and sentence boundaries for pattern matches correctTokenBoundaries(jcas, termsToTokenList); // Dictionary multi word units annotateMultiWordUnits(jcas); // Dictionary unigrams Collection<Token> tokens = JCasUtil.select(jcas, Token.class); for (Token t : tokens) { annotateDictTypes(jcas, t); } }
Example 12
Source Project: ambiverse-nlu Source File: WordShape.java License: Apache License 2.0 | 5 votes |
/** * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public WordShape(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
Example 13
Source Project: inception Source File: StringMatchingRecommenderTest.java License: Apache License 2.0 | 5 votes |
private List<CAS> getTestNECas(String aText, String[] aVals, int[][] aNEIndices, int[][] aSentIndices, int[][] aTokenIndices) throws Exception { JCas jcas = JCasFactory.createText(aText, "de"); for (int j = 0; j < aSentIndices.length; j++) { Sentence newSent = new Sentence(jcas, aSentIndices[j][0], aSentIndices[j][1]); newSent.addToIndexes(); } for (int k = 0; k < aTokenIndices.length; k++) { Token newToken = new Token(jcas, aTokenIndices[k][0], aTokenIndices[k][1]); newToken.addToIndexes(); } for (int i = 0; i < aVals.length; i++) { NamedEntity newNE = new NamedEntity(jcas, aNEIndices[i][0], aNEIndices[i][1]); newNE.setValue(aVals[i]); newNE.addToIndexes(); } List<CAS> casses = new ArrayList<>(); casses.add(jcas.getCas()); return casses; }
Example 14
Source Project: ambiverse-nlu Source File: NYTEntitySalienceFeatureExtractor.java License: Apache License 2.0 | 5 votes |
/** * Main method to extract entity-based feature-value pairs. * * @param jCas * @param entityMentions * @return */ public Map<Integer, Double> getEntityFeatureValues( JCas jCas, Collection<AidaEntity> entityMentions, TrainingSettings.FeatureExtractor featureExtractor) throws Exception { FeatureSet fs = FeatureSetFactory.createFeatureSet(featureExtractor, entityMentions); Map<Integer, Double> features = new HashMap<>(); for (Feature f : fs.features()) { features.putAll(f.extract(jCas)); } return features; }
Example 15
Source Project: bluima Source File: Event.java License: Apache License 2.0 | 5 votes |
/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public Event(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
Example 16
Source Project: bluima Source File: NewlineSentenceSplitterAnnotatorTest.java License: Apache License 2.0 | 5 votes |
@Test public void test() throws Exception { JCas jcas = getTokenizedTestCas("aa"); ArrayList<Sentence> sentences = newArrayList(select(jcas, Sentence.class)); assertEquals(1, sentences.size()); assertEquals("aa", sentences.get(0).getCoveredText()); jcas = getTokenizedTestCas("aa aa\nbb bb"); sentences = newArrayList(select(jcas, Sentence.class)); assertEquals(2, sentences.size()); assertEquals("aa aa", sentences.get(0).getCoveredText()); assertEquals("bb bb", sentences.get(1).getCoveredText()); jcas = getTokenizedTestCas("aa aa\nbb bb\ncc cc\n"); sentences = newArrayList(select(jcas, Sentence.class)); assertEquals(3, sentences.size()); assertEquals("aa aa", sentences.get(0).getCoveredText()); assertEquals("bb bb", sentences.get(1).getCoveredText()); assertEquals("cc cc", sentences.get(2).getCoveredText()); // leading \n is kept jcas = getTokenizedTestCas("\naa"); sentences = newArrayList(select(jcas, Sentence.class)); assertEquals(2, sentences.size()); assertEquals("", sentences.get(0).getCoveredText()); assertEquals("aa", sentences.get(1).getCoveredText()); }
Example 17
Source Project: bluima Source File: MultipleProteins.java License: Apache License 2.0 | 5 votes |
/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public MultipleProteins(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
Example 18
Source Project: webanno Source File: WebAnnoTsv3WriterTestBase.java License: Apache License 2.0 | 5 votes |
@Test public void testUnsetSlotFeature() throws Exception { JCas jcas = makeJCasOneSentence(); CAS cas = jcas.getCas(); List<Token> tokens = new ArrayList<>(select(jcas, Token.class)); Token t1 = tokens.get(0); Token t2 = tokens.get(1); Token t3 = tokens.get(2); Type type = cas.getTypeSystem().getType("webanno.custom.SimpleSpan"); AnnotationFS s2 = cas.createAnnotation(type, t2.getBegin(), t2.getEnd()); cas.addFsToIndexes(s2); AnnotationFS s3 = cas.createAnnotation(type, t3.getBegin(), t3.getEnd()); cas.addFsToIndexes(s3); makeLinkHostFS(jcas, "webanno.custom.FlexLinkHost", t1.getBegin(), t1.getEnd(), (FeatureStructure[]) null); writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SLOT_FEATS, asList("webanno.custom.FlexLinkHost:links"), WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList("webanno.custom.SimpleSpan", "webanno.custom.SimpleLinkHost"), WebannoTsv3Writer.PARAM_LINK_TYPES, asList("webanno.custom.FlexLinkType"), WebannoTsv3Writer.PARAM_SLOT_TARGETS, asList("webanno.custom.SimpleSpan")); }
Example 19
Source Project: baleen Source File: TemplateRecordDefinitionAnnotator.java License: Apache License 2.0 | 5 votes |
/** * Creates a record definition annotation and adds it to the JCas indexes. * * @param jCas the JCas * @param matcher the matcher that triggered the creation, which must have two groups (first being * the name, and the second being the content within the record) */ private void createRecordDefinitionAnnotation(JCas jCas, Matcher matcher) { TemplateRecordDefinition recordDefinition = new TemplateRecordDefinition(jCas); recordDefinition.setName(matcher.group(1)); recordDefinition.setBegin(matcher.start(3)); recordDefinition.setEnd(matcher.end(3)); recordDefinition.setConfidence(1.0); addAttributes(recordDefinition, "<record:" + matcher.group(1) + matcher.group(2) + " />"); addToJCasIndex(recordDefinition); }
Example 20
Source Project: webanno Source File: WebannoTsv1Reader.java License: Apache License 2.0 | 5 votes |
/** * Add sentence layer to CAS */ private void createSentence(JCas aJCas, List<Integer> firstTokenInSentence, Map<String, Token> tokensStored) { for (int i = 0; i < firstTokenInSentence.size(); i++) { Sentence outSentence = new Sentence(aJCas); // Only last sentence, and no the only sentence in the document (i!=0) if (i == firstTokenInSentence.size() - 1 && i != 0) { outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i)).getEnd()); outSentence.setEnd(tokensStored.get("t_" + (tokensStored.size())).getEnd()); outSentence.addToIndexes(); break; } if (i == firstTokenInSentence.size() - 1 && i == 0) { outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i)) .getBegin()); outSentence.setEnd(tokensStored.get("t_" + (tokensStored.size())).getEnd()); outSentence.addToIndexes(); } else if (i == 0) { outSentence.setBegin(tokensStored.get("t_" + firstTokenInSentence.get(i)) .getBegin()); outSentence.setEnd(tokensStored.get("t_" + firstTokenInSentence.get(i + 1)) .getEnd()); outSentence.addToIndexes(); } else { outSentence.setBegin( tokensStored.get("t_" + firstTokenInSentence.get(i)).getEnd() + 1); outSentence .setEnd(tokensStored.get("t_" + firstTokenInSentence.get(i + 1)).getEnd()); outSentence.addToIndexes(); } } }
Example 21
Source Project: bluima Source File: PHYS_Inverse.java License: Apache License 2.0 | 5 votes |
/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public PHYS_Inverse(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
Example 22
Source Project: baleen Source File: DocumentRelationshipAnnotator.java License: Apache License 2.0 | 5 votes |
protected List<Relation> createSentenceRelation( final JCas jCas, final Collection<Entity> sentence1entities, final Collection<Entity> sentence2entities, final Offset offset, final int distance) { final List<Relation> relations = new LinkedList<>(); for (Entity source : sentence1entities) { for (Entity target : sentence2entities) { Relation relation = createRelation( jCas, source, target, offset.getBegin(), offset.getEnd(), type, subType, "", confidence); relation.setSentenceDistance(distance); relation.setWordDistance(-1); relation.setDependencyDistance(-1); relations.add(relation); } } return relations; }
Example 23
Source Project: webanno Source File: WebAnnoTsv3WriterTestBase.java License: Apache License 2.0 | 5 votes |
@Test public void testMultiTokenSpanWithoutFeatureValue() throws Exception { JCas jcas = makeJCasOneSentence(); Span ne = new Span(jcas, 0, jcas.getDocumentText().length()); ne.addToIndexes(); writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(Span.class)); }
Example 24
Source Project: termsuite-core Source File: RegexSpotter.java License: Apache License 2.0 | 5 votes |
@Override protected void afterRuleProcessing(JCas jCas) { this.sw.stop(); totalTimeInMillis.addAndGet(this.sw.elapsed(TimeUnit.MILLISECONDS)); LOGGER.debug("Processed MWT spotting on doc {} in {}ms [Cumulated: {}ms]", JCasUtils.getSourceDocumentAnnotation(jCas).get().getUri(), sw.elapsed(TimeUnit.MILLISECONDS), totalTimeInMillis.get()); flushOccurrenceBuffer(jCas); }
Example 25
Source Project: ambiverse-nlu Source File: UiucWikiLocationsRedirects.java License: Apache License 2.0 | 5 votes |
/** * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public UiucWikiLocationsRedirects(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
Example 26
Source Project: uima-uimaj Source File: Sentence.java License: Apache License 2.0 | 5 votes |
/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public Sentence(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
Example 27
Source Project: bluima Source File: Size.java License: Apache License 2.0 | 5 votes |
/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public Size(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
Example 28
Source Project: uima-uimaj Source File: IndexRepositoryTest.java License: Apache License 2.0 | 5 votes |
public void testMissingSofaRef() throws Exception { JCas jcas = cas.getJCas(); Annotation a = new Annotation(jcas, 0, 4); FeatureImpl feat = (FeatureImpl) cas.getTypeSystem().getType(CAS.TYPE_NAME_ANNOTATION_BASE) .getFeatureByBaseName(CAS.FEATURE_BASE_NAME_SOFA); a._setFeatureValueNcNj(feat, null); try { jcas.addFsToIndexes(a); } catch (CASRuntimeException e) { assertEquals("SOFAREF_NOT_SET", e.getMessageKey()); return; } fail("required exception not thrown"); // fail }
Example 29
Source Project: baleen Source File: AbstractMetadataRegexAnnotator.java License: Apache License 2.0 | 5 votes |
@Override protected Metadata create(JCas jCas, Matcher matcher) { Metadata md = new Metadata(jCas); md.setKey(key); String value = matcher.group(valueGroup); md.setValue(convertValue(value)); return md; }
Example 30
Source Project: bluima Source File: RoomNumber.java License: Apache License 2.0 | 5 votes |
/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public RoomNumber(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }