org.apache.uima.fit.util.JCasUtil Java Examples

The following examples show how to use org.apache.uima.fit.util.JCasUtil. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FilterCoocurrencesByTriggerword.java    From bluima with Apache License 2.0 6 votes vote down vote up
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {

    List<Cooccurrence> toRemove = newLinkedList();

    for (Entry<Sentence, Collection<Cooccurrence>> sentenceWithCooc : JCasUtil
            .indexCovered(jCas, Sentence.class, Cooccurrence.class)
            .entrySet()) {

        String sText = sentenceWithCooc.getKey().getCoveredText()
                .toLowerCase();

        if (!TRIGGER_WORDS.matcher(sText).find()) {
            toRemove.addAll(sentenceWithCooc.getValue());
        }
    }

    // remove
    Cooccurrence[] array = toRemove.toArray(new Cooccurrence[toRemove
            .size()]);
    for (int i = 0; i < array.length; i++) {
        array[i].removeFromIndexes();
    }
}
 
Example #2
Source File: MergeAdjacentTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testSubTypesMultiple() throws Exception {
  jCas.setDocumentText("John Smith was seen at London King's Cross 30N 15E");

  Annotations.createLocation(jCas, 23, 29, "London", null);
  Annotations.createLocation(jCas, 30, 42, "King's Cross", null);

  Annotations.createCoordinate(jCas, 43, 46, "30N");
  Annotations.createCoordinate(jCas, 47, 50, "15E");

  processJCas("types", new String[] {"Location", "Coordinate"});

  assertEquals(2, JCasUtil.select(jCas, Location.class).size()); // 1 + 1
  assertEquals(1, JCasUtil.select(jCas, Coordinate.class).size());

  Location l = JCasUtil.selectByIndex(jCas, Location.class, 0);
  assertEquals("London King's Cross", l.getCoveredText());
  assertEquals("London King's Cross", l.getValue());

  Coordinate c = JCasUtil.selectByIndex(jCas, Coordinate.class, 0);
  assertEquals("30N 15E", c.getCoveredText());
  assertEquals("30N 15E", c.getValue());
}
 
Example #3
Source File: ListTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testmultipleHitsWithText() throws Exception {

  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          List.class, List.PARAM_TERMS, terms, List.PARAM_TYPE, LOCATION);

  AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed);

  // the same search term appears multiple times in text...
  jCas.setDocumentText("Hello world, and hello world again.");
  // but then subset using a Text annotation
  new Text(jCas, 10, jCas.getDocumentText().length()).addToIndexes();

  ae.process(jCas);

  assertEquals(1, JCasUtil.select(jCas, Location.class).size());
  Location l = JCasUtil.selectByIndex(jCas, Location.class, 0);
  assertEquals(WORLD, l.getValue());
  assertEquals(WORLD, l.getCoveredText());
  assertTrue(l.getBegin() > 10);

  ae.destroy();
}
 
Example #4
Source File: StructureContentExtractorTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testInitializingMapper() throws UIMAException, IOException {
  JCas jCas = JCasSingleton.getJCasInstance();

  BaleenContentExtractor contentExtractor = new TestStructureContentExtractor();
  Map<String, Object> params = new HashMap<>();
  params.put("contentMappers", new String[] {"MetaTags"});
  contentExtractor.initialize(new CustomResourceSpecifier_impl(), params);

  contentExtractor.processStream(null, "source", jCas);

  long count =
      JCasUtil.select(jCas, Metadata.class).stream()
          .filter(
              m ->
                  m.getKey().equals("baleen:content-mappers")
                      && m.getValue().contains("MetaTags"))
          .count();
  assertEquals(1, count);
}
 
Example #5
Source File: MongoParagraph.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
protected void doProcess(JCas jCas) throws AnalysisEngineProcessException {
  String documentId = getUniqueId(jCas);

  List<Document> batchInsert = new ArrayList<>();

  for (Paragraph paragraph : JCasUtil.select(jCas, Paragraph.class)) {
    Document doc = new Document();

    DocumentAnnotation da = getDocumentAnnotation(jCas);

    doc.append(FIELD_DOCUMENT_ID, documentId)
        .append(FIELD_CONTENT, paragraph.getCoveredText())
        .append(FIELD_DOCUMENT_SOURCE, da.getSourceUri())
        .append(FIELD_BEGIN, paragraph.getBegin())
        .append(FIELD_END, paragraph.getEnd());

    batchInsert.add(doc);
  }

  if (!batchInsert.isEmpty()) paragraphsCollection.insertMany(batchInsert);
}
 
Example #6
Source File: CorefBracketsTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testExistingReferentsMerge() throws Exception {
  AnalysisEngine ae =
      AnalysisEngineFactory.createEngine(CorefBrackets.class, "mergeReferents", true);

  populateJCasMergeTest(jCas);
  ae.process(jCas);

  assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size());

  Location l = JCasUtil.selectByIndex(jCas, Location.class, 0);
  Coordinate c = JCasUtil.selectByIndex(jCas, Coordinate.class, 0);

  ReferenceTarget rt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0);

  assertEquals(rt, l.getReferent());
  assertEquals(rt, c.getReferent());
  assertEquals(c.getReferent(), l.getReferent());
}
 
Example #7
Source File: OpenNLPParser.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
public void doProcess(final JCas jCas) throws AnalysisEngineProcessException {
  // For each sentence (in the JCas)e, we recreate the spans from our
  // WordTokens.

  final Map<Sentence, List<WordToken>> sentences =
      JCasUtil.indexCovered(jCas, Sentence.class, WordToken.class);

  sentences.entrySet().stream()
      .filter(e -> !e.getValue().isEmpty())
      .forEach(
          e -> {
            final Sentence sentence = e.getKey();
            final Collection<WordToken> tokens = e.getValue();

            final Parse parsed = parseSentence(sentence, tokens);

            updatePhraseChunks(jCas, sentence, parsed);
          });
}
 
Example #8
Source File: RelativeDateTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testNextLastMonth() throws Exception {
  jCas.setDocumentText("Last month was September, and next month is November");
  setDocumentDate();
  processJCas();

  assertEquals(2, JCasUtil.select(jCas, Temporal.class).size());

  Temporal t1 = JCasUtil.selectByIndex(jCas, Temporal.class, 0);
  assertEquals("Last month", t1.getCoveredText());
  assertEquals(
      LocalDate.of(2016, 9, 1).atStartOfDay().toEpochSecond(ZoneOffset.UTC),
      t1.getTimestampStart());
  assertEquals(
      LocalDate.of(2016, 10, 1).atStartOfDay().toEpochSecond(ZoneOffset.UTC),
      t1.getTimestampStop());

  Temporal t2 = JCasUtil.selectByIndex(jCas, Temporal.class, 1);
  assertEquals("next month", t2.getCoveredText());
  assertEquals(
      LocalDate.of(2016, 11, 1).atStartOfDay().toEpochSecond(ZoneOffset.UTC),
      t2.getTimestampStart());
  assertEquals(
      LocalDate.of(2016, 12, 1).atStartOfDay().toEpochSecond(ZoneOffset.UTC),
      t2.getTimestampStop());
}
 
Example #9
Source File: MemoryTransportsTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testTransportCanFiltersWhitlist() throws UIMAException, IOException {

  AnalysisEngine sender =
      createAnalysisEngine(
          SharedMemoryQueueResource.RESOURCE_KEY,
          erd,
          MemoryTransportSender.PARAM_WHITELIST,
          ImmutableList.of(Location.class.getName()));
  MemoryTransportReceiver receiver = createReciever();

  TransportTester tester = new TransportTester(sender, receiver);
  tester.run();
  tester.assertTopLevel();
  tester.assertLocationMatches();
  assertFalse(JCasUtil.exists(tester.getOut(), Person.class));
}
 
Example #10
Source File: WordNetLemmatizer.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
protected void doProcess(JCas jCas) throws AnalysisEngineProcessException {
  for (final WordToken t : JCasUtil.select(jCas, WordToken.class)) {
    int lemmas = t.getLemmas() == null ? 0 : t.getLemmas().size();
    final String text = t.getCoveredText();
    final POS pos = WordNetUtils.toPos(t.getPartOfSpeech());
    if (pos != null) {
      final Optional<IndexWord> lookupWord = wordnet.lookupWord(pos, text);
      if (lookupWord.isPresent()) {
        FSArray fsArray = new FSArray(jCas, lemmas + 1);
        if (lemmas > 0) {
          copyExistingLemmas(t, fsArray);
        }
        t.setLemmas(fsArray);
        final WordLemma wordLemma = new WordLemma(jCas);
        wordLemma.setLemmaForm(lookupWord.get().getLemma());
        t.setLemmas(lemmas, wordLemma);
      }
    }
  }
}
 
Example #11
Source File: CustomTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testCaseSensitive() throws Exception {
  AnalysisEngine regexAE =
      AnalysisEngineFactory.createEngine(
          Custom.class,
          Custom.PARAM_TYPE,
          UK_GOV_DSTL_BALEEN_TYPES_COMMON_PERSON,
          Custom.PARAM_PATTERN,
          DIGIT_REGEX,
          Custom.PARAM_CASE_SENSITIVE,
          true);

  jCas.setDocumentText(TEXT);
  regexAE.process(jCas);

  assertEquals(1, JCasUtil.select(jCas, Person.class).size());

  Person p1 = JCasUtil.selectByIndex(jCas, Person.class, 0);
  assertNotNull(p1);
  assertEquals(P123, p1.getCoveredText());
  assertEquals(P123, p1.getValue());

  regexAE.destroy();
}
 
Example #12
Source File: TemplateAnnotatorTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateDefaultFieldAnnotationsMisingWithDefaultValue()
    throws AnalysisEngineProcessException, ResourceInitializationException, IOException {

  Path definitionFile = createGoodRecordDefinitionWithDefaultAndMissing();
  try {
    processJCas(TemplateAnnotator.PARAM_RECORD_DEFINITIONS_DIRECTORY, tempDirectory.toString());

    TemplateField field1 = JCasUtil.selectSingle(jCas, TemplateField.class);
    assertEquals(212, field1.getBegin());
    assertEquals(212, field1.getEnd());
    assertEquals("", field1.getCoveredText());
    assertEquals("default value", field1.getValue());

    assertEquals(1, JCasUtil.select(jCas, Metadata.class).size());

  } finally {
    Files.delete(definitionFile);
  }
}
 
Example #13
Source File: TemplateFieldDefinitionAnnotatorTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void annotateFieldWithHtmlRegex()
    throws AnalysisEngineProcessException, ResourceInitializationException {
  jCas.setDocumentText(FIELD_HTML_REGEX);
  processJCas();
  TemplateFieldDefinition field = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0);
  assertEquals(6, field.getBegin());
  assertEquals(90, field.getEnd());
  assertEquals(
      "<<field:html regex=\"/^&lt;([a-z]+)([^&lt;]+)*(?:&gt;(.*)&lt;\\/\\1&gt;|\\s+\\/&gt;)$/\">>",
      field.getCoveredText());
  assertEquals("html", field.getName());
  assertEquals("/^<([a-z]+)([^<]+)*(?:>(.*)<\\/\\1>|\\s+\\/>)$/", field.getRegex());
  assertNull(field.getDefaultValue());
  assertFalse(field.getRepeat());
}
 
Example #14
Source File: TemplateAnnotatorTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateFieldAnnotationsFromSelectorFileWithRegexDefaultUsed()
    throws AnalysisEngineProcessException, ResourceInitializationException, IOException {

  Path definitionFile = createGoodRecordDefinitionWithRegexDefaultNeeded();
  try {
    processJCas(TemplateAnnotator.PARAM_RECORD_DEFINITIONS_DIRECTORY, tempDirectory.toString());

    assertRecordCoversParas2to4();

    TemplateField field1 = JCasUtil.selectSingle(jCas, TemplateField.class);
    assertEquals(159, field1.getBegin());
    assertEquals(159, field1.getEnd());
    assertEquals("", field1.getCoveredText());
    assertEquals("horse", field1.getValue());

    assertEquals(2, JCasUtil.select(jCas, Metadata.class).size());

  } finally {
    Files.delete(definitionFile);
  }
}
 
Example #15
Source File: CorefBracketsTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testExistingCoordReferent() throws Exception {
  AnalysisEngine ae = AnalysisEngineFactory.createEngine(CorefBrackets.class);

  jCas.setDocumentText(LOC_TEXT);

  ReferenceTarget rt1 = Annotations.createReferenceTarget(jCas);

  Annotations.createLocation(jCas, 0, 9, SOMEWHERE, null);
  Coordinate c1 = Annotations.createCoordinate(jCas, 11, 19, MRGS);
  c1.setReferent(rt1);

  ae.process(jCas);

  assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size());

  ReferenceTarget rt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0);
  Location l = JCasUtil.selectByIndex(jCas, Location.class, 0);
  Coordinate c = JCasUtil.selectByIndex(jCas, Coordinate.class, 0);

  assertEquals(rt, l.getReferent());
  assertEquals(rt, c.getReferent());
  assertEquals(c.getReferent(), l.getReferent());
}
 
Example #16
Source File: DocumentConverterTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void canConvertSentence() throws UIMAException {
  JCas jCas = JCasFactory.createJCas();
  jCas.setDocumentText("This is a test. This is another test.");

  String[] words = new String[] {"This", "is", "another", "test", "."};
  Sentence sentence2 =
      new Sentence(words, new int[] {16, 21, 24, 31, 35}, new int[] {20, 23, 30, 34, 36}, words);
  when(document.sentences()).thenReturn(new Sentence[] {sentence, sentence2});
  DocumentConverter converter = new DocumentConverter(jCas, document);
  converter.convert();

  Collection<uk.gov.dstl.baleen.types.language.Sentence> actual =
      JCasUtil.select(jCas, uk.gov.dstl.baleen.types.language.Sentence.class);
  assertEquals(2, actual.size());
  Iterator<uk.gov.dstl.baleen.types.language.Sentence> iterator = actual.iterator();

  uk.gov.dstl.baleen.types.language.Sentence next = iterator.next();
  assertEquals(0, next.getBegin());
  assertEquals(15, next.getEnd());
  next = iterator.next();
  assertEquals(16, next.getBegin());
  assertEquals(36, next.getEnd());
}
 
Example #17
Source File: TestMistAnalysisEngine.java    From ctakes-docker with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

//        TypeSystemDescription tsd = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath("../desc/TypeSystem.xml");
        JCas jcas = JCasFactory.createJCas();
        jcas.setDocumentText("Patient is a 30-year-old man named Leroy Butler from Green Bay, WI.");
        AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription(MistAnalysisEngine.class,
                MistAnalysisEngine.PARAM_MODEL_PATH,
                "SHARP/model/model");
        SimplePipeline.runPipeline(jcas, aed);
        for(Annotation annot : JCasUtil.select(jcas, Annotation.class)){
            System.out.println("Found annotation: " + annot.getCoveredText());
        }
        JCas deidView = jcas.getView(MistAnalysisEngine.DEID_VIEW_NAME);
        System.out.println("Deidentified version:");
        System.out.println(deidView.getDocumentText());
    }
 
Example #18
Source File: OOVFilter.java    From argument-reasoning-comprehension-task with Apache License 2.0 6 votes vote down vote up
@Override
boolean keepArgument(JCas jCas)
{
    Collection<Token> tokens = JCasUtil.select(jCas, Token.class);

    int oovWords = 0;

    for (Token token : tokens) {
        if (!vocabulary.contains(token.getCoveredText())) {
            oovWords++;
        }
    }

    frequency.addValue(oovWords);
    //        System.out.println(frequency);

    return oovWords <= THRESHOLD;
}
 
Example #19
Source File: SentenceRelationshipAnnotator.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
protected void extract(JCas jCas) throws AnalysisEngineProcessException {

  Map<Sentence, List<Entity>> languageCovered =
      JCasUtil.indexCovered(jCas, Sentence.class, Entity.class);

  Map<uk.gov.dstl.baleen.types.structure.Sentence, List<Entity>> structureCovered =
      JCasUtil.indexCovered(
          jCas, uk.gov.dstl.baleen.types.structure.Sentence.class, Entity.class);

  Map<Offset, List<Entity>> sentences = cleanSentencesByOffset(languageCovered, structureCovered);

  addRelationsToIndex(
      sentences.entrySet().stream()
          .flatMap(e -> createMeshedRelations(jCas, e.getValue(), e.getKey())));
}
 
Example #20
Source File: AddTitleToPersonTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testThree() throws AnalysisEngineProcessException, ResourceInitializationException {
  jCas.setDocumentText("They refered to him as Prime Minister John Smith");

  Person p = new Person(jCas);
  p.setBegin(jCas.getDocumentText().indexOf("John Smith"));
  p.setEnd(p.getBegin() + "John Smith".length());
  p.addToIndexes();

  processJCas();

  Collection<Person> select = JCasUtil.select(jCas, Person.class);
  assertEquals(1, select.size());

  Person out = select.iterator().next();
  assertEquals("Prime Minister", out.getTitle());
  assertEquals(jCas.getDocumentText().indexOf("Prime"), out.getBegin());
}
 
Example #21
Source File: CorefCapitalisationAndApostrophe.java    From baleen with Apache License 2.0 6 votes vote down vote up
private Map<String, List<Entity>> processEntities(JCas jCas) {
  Map<String, List<Entity>> groups = new HashMap<>();
  Collection<Entity> entities = JCasUtil.select(jCas, Entity.class);

  for (Entity entity : entities) {
    String value = getEntityValue(entity);
    value = normalizeValue(value);

    String key = entity.getType().getName().toUpperCase() + "::" + value;
    List<Entity> groupEntities = groups.containsKey(key) ? groups.get(key) : new ArrayList<>();

    groupEntities.add(entity);
    groups.put(key, groupEntities);
  }
  return groups;
}
 
Example #22
Source File: SurnameTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testMultiplePersonNoReferences() throws Exception {
  jCas.setDocumentText(
      "Mr Simon Brown, was caught stealing sausages. Brown was found guilty. Mr Peter Brown was acquitted.");

  Person pSimon = new Person(jCas, 0, 14);
  pSimon.addToIndexes();

  Person pPeter = new Person(jCas, 70, 84);
  pPeter.addToIndexes();

  processJCas();

  assertEquals(3, JCasUtil.select(jCas, Person.class).size());
  Person p1 = JCasUtil.selectByIndex(jCas, Person.class, 0);
  assertEquals("Mr Simon Brown", p1.getCoveredText());

  Person p2 = JCasUtil.selectByIndex(jCas, Person.class, 2);
  assertEquals("Mr Peter Brown", p2.getCoveredText());

  Person p3 = JCasUtil.selectByIndex(jCas, Person.class, 1);
  assertEquals("Brown", p3.getCoveredText());

  assertNull(p3.getReferent());
}
 
Example #23
Source File: AddTitleToPersonTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testExistingMixed()
    throws AnalysisEngineProcessException, ResourceInitializationException {
  jCas.setDocumentText("They refered to him as Senator Col John Smith");

  Person p = new Person(jCas);
  p.setBegin(jCas.getDocumentText().indexOf("Col John Smith"));
  p.setEnd(p.getBegin() + "Col John Smith".length());
  p.addToIndexes();

  processJCas();
  Collection<Person> select = JCasUtil.select(jCas, Person.class);
  assertEquals(1, select.size());

  Person out = select.iterator().next();
  assertEquals("Senator Col", out.getTitle());
  assertEquals("Senator Col John Smith", out.getCoveredText());
}
 
Example #24
Source File: JsonJCasConverterTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testDeserializeBlacklist() throws IOException, UIMAException {

  List<Class<? extends BaleenAnnotation>> blackList =
      ImmutableList.<Class<? extends BaleenAnnotation>>of(Person.class);

  final JsonJCasConverter serializer = createConverter();
  final JsonJCasConverter deserializer = createConverter(Collections.emptyList(), blackList);

  JCasSerializationTester testUtil = new JCasSerializationTester();

  final String json = serializer.serialise(testUtil.getIn());
  deserializer.deserialise(testUtil.getOut(), json);

  testUtil.assertTopLevel();
  testUtil.assertLocationMatches();
  assertFalse(JCasUtil.exists(testUtil.getOut(), Person.class));
}
 
Example #25
Source File: AddTitleToPersonTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testSingleExisting()
    throws AnalysisEngineProcessException, ResourceInitializationException {
  jCas.setDocumentText("They refered to him as Sir John Smith");

  Person p = new Person(jCas);
  p.setBegin(jCas.getDocumentText().indexOf("Sir John Smith"));
  p.setEnd(p.getBegin() + "Sir John Smith".length());
  p.addToIndexes();

  processJCas();

  Collection<Person> select = JCasUtil.select(jCas, Person.class);
  assertEquals(1, select.size());

  Person out = select.iterator().next();
  assertEquals("Sir", out.getTitle());
  assertEquals("Sir John Smith", out.getCoveredText());
}
 
Example #26
Source File: TOLocationEntity.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
protected void doProcess(JCas jCas) throws AnalysisEngineProcessException {
  List<WordToken> tokens = new ArrayList<>(JCasUtil.select(jCas, WordToken.class));

  for (int i = 0; i < tokens.size() - 2; i++) {
    if ("VBD".equals(tokens.get(i).getPartOfSpeech())
        && "TO".equals(tokens.get(i + 1).getPartOfSpeech())
        && "NNP".equals(tokens.get(i + 2).getPartOfSpeech())) {
      Location l = new Location(jCas);
      l.setBegin(tokens.get(i + 2).getBegin());
      l.setEnd(findNNPEnd(tokens, i + 2));
      addToJCasIndex(l);
    }
  }
}
 
Example #27
Source File: BmeowTypeAnnotator.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
private static BmeowTypeDictionary constructBmeowTypeDictionaryFromGoldStandard(JCas jCas) throws EntityLinkingDataAccessException {
	Entities allEntitiesInDocument = new Entities();
	Collection<de.mpg.mpi_inf.ambiversenlu.nlu.entitylinking.uima.type.Entity> entities = JCasUtil.select(jCas, de.mpg.mpi_inf.ambiversenlu.nlu.entitylinking.uima.type.Entity.class);

	for(de.mpg.mpi_inf.ambiversenlu.nlu.entitylinking.uima.type.Entity e : entities){
		KBIdentifiedEntity kbEntity = new KBIdentifiedEntity(Constants.YAGO_KB_IDENTIFIER + ":" + e.getID());
		allEntitiesInDocument.add(new Entity(kbEntity, DataAccess.getInternalIdForKBEntity(kbEntity)));
	}
	return getDicitionaryForEntities(allEntitiesInDocument);
}
 
Example #28
Source File: MongoStemmingTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultipleWords() throws Exception {
  jCas.setDocumentText("Bill and Ben entered the room on a dark and windy night.");
  processJCas(MONGO, erd, COLLECTION, MONGO_COLL, TYPE, BUZZWORD);

  assertEquals(1, JCasUtil.select(jCas, Buzzword.class).size());

  Buzzword b1 = JCasUtil.selectByIndex(jCas, Buzzword.class, 0);
  assertEquals("entered the room", b1.getValue());
  assertEquals("entered the room", b1.getCoveredText());
}
 
Example #29
Source File: MergeAdjacentQuantitiesTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testSameUnit() throws Exception {
  jCas.setDocumentText("The packages weighed: 4kg 2.5kg 14.2kg");

  Annotations.createWeightQuantity(jCas, 22, 25, "4kg", 4, "kg", 4);
  Annotations.createWeightQuantity(jCas, 26, 31, "2.5kg", 2.5, "kg", 2.5);
  Annotations.createWeightQuantity(jCas, 32, 38, "14.2kg", 14.2, "kg", 14.2);

  processJCas();

  assertEquals(3, JCasUtil.select(jCas, Quantity.class).size());
}
 
Example #30
Source File: BlueCasUtil.java    From bluima with Apache License 2.0 5 votes vote down vote up
public static JCas setDocId(JCas jCas, int docId) {
    if (JCasUtil.exists(jCas, Header.class)) {
        throw new IllegalArgumentException();
    }
    Header h = new Header(jCas);
    h.setDocId(docId + "");
    h.addToIndexes();
    return jCas;
}