Java Code Examples for org.apache.uima.fit.util.JCasUtil#indexCovered()

The following examples show how to use org.apache.uima.fit.util.JCasUtil#indexCovered() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OpenNLPParser.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
public void doProcess(final JCas jCas) throws AnalysisEngineProcessException {
  // For each sentence (in the JCas)e, we recreate the spans from our
  // WordTokens.

  final Map<Sentence, List<WordToken>> sentences =
      JCasUtil.indexCovered(jCas, Sentence.class, WordToken.class);

  sentences.entrySet().stream()
      .filter(e -> !e.getValue().isEmpty())
      .forEach(
          e -> {
            final Sentence sentence = e.getKey();
            final Collection<WordToken> tokens = e.getValue();

            final Parse parsed = parseSentence(sentence, tokens);

            updatePhraseChunks(jCas, sentence, parsed);
          });
}
 
Example 2
Source File: AbstractInteractionBasedSentenceRelationshipAnnotator.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
protected final void extract(JCas jCas) {

  final Map<Sentence, List<Interaction>> sentenceToInteraction =
      JCasUtil.indexCovered(jCas, Sentence.class, Interaction.class);
  final Map<Sentence, List<Entity>> sentenceToEntities =
      JCasUtil.indexCovered(jCas, Sentence.class, Entity.class);

  for (final Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {

    final Collection<Interaction> interactions = sentenceToInteraction.get(sentence);

    final Collection<Entity> entities = sentenceToEntities.get(sentence);

    // Check we have enough in the sentence to warrant further work
    if (!interactions.isEmpty() && entities.size() >= 2) {
      final Stream<Relation> relations = extract(jCas, sentence, interactions, entities);
      addRelationsToIndex(relations);
    }
  }
}
 
Example 3
Source File: SentenceRelationshipAnnotator.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
protected void extract(JCas jCas) throws AnalysisEngineProcessException {

  Map<Sentence, List<Entity>> languageCovered =
      JCasUtil.indexCovered(jCas, Sentence.class, Entity.class);

  Map<uk.gov.dstl.baleen.types.structure.Sentence, List<Entity>> structureCovered =
      JCasUtil.indexCovered(
          jCas, uk.gov.dstl.baleen.types.structure.Sentence.class, Entity.class);

  Map<Offset, List<Entity>> sentences = cleanSentencesByOffset(languageCovered, structureCovered);

  addRelationsToIndex(
      sentences.entrySet().stream()
          .flatMap(e -> createMeshedRelations(jCas, e.getValue(), e.getKey())));
}
 
Example 4
Source File: DependencyRelationshipAnnotator.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
protected void extract(JCas jCas) throws AnalysisEngineProcessException {

  Map<Sentence, List<Entity>> languageCovered =
      JCasUtil.indexCovered(jCas, uk.gov.dstl.baleen.types.language.Sentence.class, Entity.class);

  Map<uk.gov.dstl.baleen.types.structure.Sentence, List<Entity>> structureCovered =
      JCasUtil.indexCovered(
          jCas, uk.gov.dstl.baleen.types.structure.Sentence.class, Entity.class);

  Map<Offset, List<Entity>> sentences = cleanSentencesByOffset(languageCovered, structureCovered);

  addRelationsToIndex(
      sentences.entrySet().stream()
          .flatMap(e -> createDependantRelations(jCas, e.getValue(), e.getKey())));
}
 
Example 5
Source File: Coreference.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
protected void write(JCas jCas) {

  final String source = getDocumentAnnotation(jCas).getSourceUri();

  // For each entity we need to find all the other sentences they are contained in

  // This should be all entities and sentences
  final Map<Entity, List<Sentence>> coveringSentence =
      JCasUtil.indexCovering(jCas, Entity.class, Sentence.class);
  final Map<Sentence, List<Entity>> coveredEntities =
      JCasUtil.indexCovered(jCas, Sentence.class, Entity.class);
  final Map<Sentence, List<WordToken>> coveredTokens =
      JCasUtil.indexCovered(jCas, Sentence.class, WordToken.class);
  final Map<WordToken, List<Entity>> coveringEntity =
      JCasUtil.indexCovering(jCas, WordToken.class, Entity.class);

  JCasUtil.select(jCas, Entity.class).stream()
      .map(
          e ->
              convertEntityToRow(
                  source, coveringSentence, coveredEntities, coveredTokens, coveringEntity, e))
      .filter(s -> s.length > 0)
      .forEach(this::write);
}
 
Example 6
Source File: ProperNounInformationCollector.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
public <T extends Entity> Set<EntityInformation<T>> getEntityInformation(
    JCas jCas, Class<T> clazz) {
  Multimap<ReferenceTarget, T> map = ReferentUtils.createReferentMap(jCas, clazz);
  Map<T, List<Sentence>> index = JCasUtil.indexCovering(jCas, clazz, Sentence.class);
  Map<T, List<WordToken>> tokens = JCasUtil.indexCovered(jCas, clazz, WordToken.class);

  Set<EntityInformation<T>> infos = new HashSet<>();
  for (Map.Entry<ReferenceTarget, Collection<T>> entry : map.asMap().entrySet()) {
    Collection<Sentence> sentences =
        entry.getValue().stream().flatMap(m -> index.get(m).stream()).collect(Collectors.toSet());

    List<T> properNouns =
        entry.getValue().stream()
            .filter(
                e ->
                    tokens.get(e).stream()
                        .map(WordToken::getPartOfSpeech)
                        .anyMatch("NNP"::equals))
            .collect(toList());

    infos.add(new EntityInformation<T>(entry.getKey(), properNouns, sentences));
  }

  return infos;
}
 
Example 7
Source File: SimpleEventExtractor.java    From baleen with Apache License 2.0 5 votes vote down vote up
private <T extends Base> void extractEventsFrom(JCas jCas, Collection<T> sections) {

    Map<? extends Base, List<Location>> locationIndex =
        JCasUtil.indexCovered(jCas, sectionClass, Location.class);
    Map<? extends Base, List<Temporal>> temporalIndex =
        JCasUtil.indexCovered(jCas, sectionClass, Temporal.class);
    Map<? extends Base, List<Entity>> entityIndex =
        JCasUtil.indexCovered(jCas, sectionClass, Entity.class);

    for (T t : sections) {

      List<Entity> allEntitiesInSection = (List<Entity>) entityIndex.get(t);
      Set<Entity> relevantEntities = filterEntities(allEntitiesInSection);
      List<Location> locations = (List<Location>) locationIndex.get(t);
      List<Temporal> temporals = (List<Temporal>) temporalIndex.get(t);
      int begin = t.getBegin();
      int end = t.getEnd();

      if (!relevantEntities.isEmpty() && !locations.isEmpty() && !temporals.isEmpty()) {
        createEvent(jCas, begin, end, relevantEntities, locations, temporals, 0);
      }

      if (relevantEntities.size() > 1 && locations.size() > 1 && temporals.size() > 1) {
        createEvent(jCas, begin, end, relevantEntities, locations, temporals, 1);
      }
    }
  }
 
Example 8
Source File: PartOfSpeechRelationshipAnnotator.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
protected void extract(JCas jCas) throws AnalysisEngineProcessException {

  Map<WordToken, List<Entity>> coveredEntities =
      JCasUtil.indexCovering(jCas, WordToken.class, Entity.class);

  Map<Sentence, List<WordToken>> sentences =
      JCasUtil.indexCovered(jCas, Sentence.class, WordToken.class);

  sentences.forEach((s, tokens) -> processSentence(jCas, s, sort(tokens), coveredEntities));
}
 
Example 9
Source File: DocumentRelationshipAnnotator.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
protected void extract(JCas jCas) throws AnalysisEngineProcessException {

  Map<Sentence, List<Entity>> languageCovered =
      JCasUtil.indexCovered(jCas, Sentence.class, Entity.class);

  Map<uk.gov.dstl.baleen.types.structure.Sentence, List<Entity>> structureCovered =
      JCasUtil.indexCovered(
          jCas, uk.gov.dstl.baleen.types.structure.Sentence.class, Entity.class);

  SortedMap<Offset, List<Entity>> sentences =
      cleanSentencesByOffset(languageCovered, structureCovered);

  addRelationsToIndex(createRelations(jCas, sentences));
}
 
Example 10
Source File: TextBlocks.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException {

  final Collection<Structure> structures = JCasUtil.select(jCas, Structure.class);

  if (structures.isEmpty()) {
    // If the jCas has no structural annotations then the entire text should be marked as a text
    // block

    final int end = jCas.getDocumentText().length();
    final Text t = new Text(jCas, 0, end);
    addToJCasIndex(t);

  } else {
    // Otherwise add the types we want...

    structures.stream()
        .filter(s -> structuralClasses.contains(s.getClass()))
        .map(s -> new Text(jCas, s.getBegin(), s.getEnd()))
        .forEach(this::addToJCasIndex);

    // Now remove any that cover others, so we keep only biggest/most detailed as per request
    final Map<Text, List<Text>> cover;
    if (keepSmallest) {
      cover = JCasUtil.indexCovering(jCas, Text.class, Text.class);
    } else {
      cover = JCasUtil.indexCovered(jCas, Text.class, Text.class);
    }
    cover.forEach(
        (t, c) ->
            c.remove(t)); // Remove where x has been pulled out as covering itself (potential bug
    // introduced in UIMAfit 2.3.0)
    cover.values().stream().flatMap(Collection::stream).forEach(this::removeFromJCasIndex);
  }
}
 
Example 11
Source File: SentenceFactory.java    From baleen with Apache License 2.0 5 votes vote down vote up
/**
 * Construct the sentence factory for the given jCas.
 *
 * @param jCas to create senteces from
 */
public SentenceFactory(JCas jCas) {
  this(
      JCasUtil.indexCovered(jCas, Sentence.class, WordToken.class),
      JCasUtil.indexCovering(jCas, WordToken.class, Entity.class),
      JCasUtil.indexCovering(jCas, WordToken.class, PhraseChunk.class),
      JCasUtil.indexCovered(jCas, Sentence.class, Dependency.class));
}
 
Example 12
Source File: AssignTypeToInteraction.java    From baleen with Apache License 2.0 4 votes vote down vote up
@Override
protected void doProcess(JCas jCas) throws AnalysisEngineProcessException {
  Map<Interaction, List<WordToken>> interactionToWords =
      JCasUtil.indexCovered(jCas, Interaction.class, WordToken.class);

  Collection<Interaction> allInteractions =
      new ArrayList<>(JCasUtil.select(jCas, Interaction.class));
  for (Interaction interaction : allInteractions) {
    String value = interaction.getCoveredText();
    Collection<WordToken> words = interactionToWords.get(interaction);

    if (words != null && !words.isEmpty() && value != null && !value.isEmpty()) {
      // So we have the covered words and the interaction value (ie the word covered by
      // the interact)

      // Look for a string match between the interaction value and the words then find all
      // the potential POS it could be

      Stream<String> keys =
          words.stream()
              .filter(p -> p.getCoveredText().equalsIgnoreCase(value))
              .map(w -> w.getPartOfSpeech())
              .distinct()
              .filter(Objects::nonNull)
              .map(p -> toKey(p, value));

      // For each interaction we create a new interaction which is has the right type info

      // This get does POS matching for us
      keys.map(definitions::get)
          .filter(l -> l != null && !l.isEmpty())
          .flatMap(Collection::stream)
          .forEach(
              d -> {
                Interaction i =
                    ComparableEntitySpanUtils.copyInteraction(
                        jCas, interaction.getBegin(), interaction.getEnd(), interaction);

                i.setRelationshipType(d.getType());
                i.setRelationSubType(d.getSubType());

                addToJCasIndex(i);
              });
    }
  }

  // Delete the old interaction, its either been replaced or not
  removeFromJCasIndex(allInteractions);
}
 
Example 13
Source File: UbmreDependency.java    From baleen with Apache License 2.0 4 votes vote down vote up
@Override
protected void extract(JCas jCas) {

  final Map<WordToken, List<Interaction>> tokenToInteraction =
      JCasUtil.indexCovered(jCas, WordToken.class, Interaction.class);
  final Map<Entity, List<Dependency>> entityToDependency =
      JCasUtil.indexCovered(jCas, Entity.class, Dependency.class);
  final Map<Interaction, List<WordToken>> interactionToDependencies =
      JCasUtil.indexCovered(jCas, Interaction.class, WordToken.class);

  final Collection<Entity> entities = JCasUtil.select(jCas, Entity.class);

  // This is the complex part. We are looking to find all entities close to interaction words
  // in 'dependency space'.
  // We allow a entity to traverse the graph until they get to a a verb, then in effect they
  // can go verb to verb. If they want to go verb to noun that's ok (our interaction word
  // could be a noun) but they can't then go back to a verb (since that would joint two
  // disconnected verb trees).

  final Multimap<Interaction, Entity> interactionToEntities = HashMultimap.create();
  for (final Entity entity : entities) {
    dependencyGraph.traverse(
        maxDependencyDistance,
        entityToDependency.getOrDefault(entity, Collections.emptyList()),
        traverseToVerb(tokenToInteraction, interactionToEntities, entity));
  }

  // Now we can create all the relations

  final Stream<Relation> relations =
      interactionToEntities.asMap().entrySet().stream()
          .flatMap(
              e -> {
                final Interaction i = e.getKey();

                final boolean interactionIsVerb =
                    interactionToDependencies.getOrDefault(i, Collections.emptyList()).stream()
                        .anyMatch(p -> p.getPartOfSpeech().startsWith("V"));

                final Collection<Entity> c = e.getValue();
                return createMeshedRelations(jCas, i, c, confidence)
                    .filter(Objects::nonNull)
                    .filter(
                        r ->
                            // Filter applies RD2: If a verb then we interaction should be between
                            // the two entities
                            !interactionIsVerb
                                || AnnotationUtils.isInBetween(r, r.getSource(), r.getTarget()));
              });

  addRelationsToIndex(relations);
}
 
Example 14
Source File: ParseTree.java    From baleen with Apache License 2.0 4 votes vote down vote up
/**
 * Builds the tree.
 *
 * @param jCas the j cas
 * @return the parses the tree
 */
public static ParseTree build(JCas jCas) {

  // Build a tree phrase to phrase

  final Map<PhraseChunk, List<PhraseChunk>> index =
      JCasUtil.indexCovering(jCas, PhraseChunk.class, PhraseChunk.class);

  final Collection<PhraseChunk> phrases = JCasUtil.select(jCas, PhraseChunk.class);

  final List<ParseTreeNode> roots = new LinkedList<>();
  final Map<PhraseChunk, ParseTreeNode> chunkToNode = new HashMap<>();

  for (final PhraseChunk chunk : phrases) {

    ParseTreeNode treeNode = chunkToNode.computeIfAbsent(chunk, ParseTreeNode::new);

    final Collection<PhraseChunk> covering = index.get(chunk);
    if (covering == null || covering.isEmpty()) {
      // Nothing is covering this Jcas, so its a root
      roots.add(treeNode);
    } else {
      // This is covered, so we add the smallest one as out parent
      final PhraseChunk parent = findSmallest(covering);

      ParseTreeNode parentNode = chunkToNode.get(parent);
      if (parentNode == null) {
        parentNode = new ParseTreeNode(parent);
        chunkToNode.put(parent, parentNode);
      }

      treeNode.setParent(parentNode);
      parentNode.addChild(treeNode);
    }
  }

  // Add words to the tree

  final Map<PhraseChunk, List<WordToken>> wordIndex =
      JCasUtil.indexCovered(jCas, PhraseChunk.class, WordToken.class);

  final Map<WordToken, ParseTreeNode> wordToNode = new HashMap<>();

  chunkToNode
      .values()
      .forEach(
          n -> {
            // Sort all tree nodes by sentence order
            n.getChildren().sort(SENTENCE_ORDER);

            // Get all the words which are within this chunk, and then remove those which are in
            // children
            final Collection<WordToken> allWords = wordIndex.get(n.getChunk());
            if (allWords != null) {
              final List<WordToken> words = new ArrayList<>(allWords);

              // Remove the words which are covered by our children, leaving just our words
              if (n.hasChildren()) {
                n.getChildren().stream()
                    .map(t -> wordIndex.get(t.getChunk()))
                    .filter(Objects::nonNull)
                    .forEach(words::removeAll);
              }

              // Add the words into the treenode
              n.addWords(words);
              words.stream().forEach(w -> wordToNode.put(w, n));
            }
          });

  // Sort roots

  roots.sort(SENTENCE_ORDER);

  return new ParseTree(roots, chunkToNode, wordToNode);
}
 
Example 15
Source File: PostprocessRutaEngine.java    From bluima with Apache License 2.0 4 votes vote down vote up
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {

	Map<BrainRegionChunk, Collection<BrainRegionDictTerm>> chunksIdx = JCasUtil
			.indexCovered(jCas, BrainRegionChunk.class,
					BrainRegionDictTerm.class);
	Map<BRCooc, Collection<BrainRegionChunk>> coocsIdx = JCasUtil
			.indexCovered(jCas, BRCooc.class, BrainRegionChunk.class);

	// all BRCoocs, and the enclosed two BrainRegionChunks
	for (Entry<BRCooc, Collection<BrainRegionChunk>> cooc : coocsIdx
			.entrySet()) {
		// Prin.t(cooc.getKey());Prin.t(cooc.getValue());

		List<BrainRegionChunk> chunks = newArrayList(cooc.getValue());
		if (chunks.size() != 2) {// ignore, then
			LOG.warn("BRCoocs should have exactly 2 chunks, but found "
					+ chunks.size() + " chunks, ignoring");

		} else {
			// all BRs from first and second chunks
			Collection<BrainRegionDictTerm> brs1 = chunksIdx.get(chunks
					.get(0));
			Collection<BrainRegionDictTerm> brs2 = chunksIdx.get(chunks
					.get(1));

			// create Cooccurrence with all permutations
			for (BrainRegionDictTerm br1 : brs1) {
				// TODO better
				if (br1.getCoveredText().startsWith("the ")
						|| br1.getCoveredText().startsWith("The ")) {
					br1.setBegin(br1.getBegin() + 4);
				}
				for (BrainRegionDictTerm br2 : brs2) {
					// TODO better
					if (br2.getCoveredText().startsWith("the ")
							|| br2.getCoveredText().startsWith("The ")) {
						br2.setBegin(br2.getBegin() + 4);
					}
					Cooccurrence newC = new Cooccurrence(jCas, cooc
							.getKey().getBegin(), cooc.getKey().getEnd());
					newC.setFirstEntity(br1);
					newC.setSecondEntity(br2);
					newC.addToIndexes();
				}
			}
		}
	}
}
 
Example 16
Source File: WordDistributionDocumentSummary.java    From baleen with Apache License 2.0 2 votes vote down vote up
@Override
protected void doProcess(JCas jCas) throws AnalysisEngineProcessException {

  getMonitor().debug("Running Document Summary Annotator");

  int summaryCharacterCount = 0;
  StringBuilder summaryBuilder = new StringBuilder();

  Map<Sentence, List<WordToken>> sentenceToWordsMap =
      JCasUtil.indexCovered(jCas, Sentence.class, WordToken.class);

  Map<String, List<String>> sentenceToWordsStringMap =
      getSentenceToWordsStringMap(sentenceToWordsMap);

  List<String> wordList = getWordList(jCas);

  Set<String> wordSet = new HashSet<>(wordList);

  Map<String, Integer> wordFrequencies = getWordFrequencies(wordList, wordSet);

  Set<String> topSentencesSet = new LinkedHashSet<>();

  while (summaryCharacterCount < desiredSummaryCharacterCount) {

    int numberOfWordsAboveThreshold =
        (int)
            wordFrequencies.entrySet().stream()
                .filter(entry -> entry.getValue() > frequencyThreshold)
                .count();

    ListOfOrderedSentencesGenerator<String> listOfOrderedSentencesGenerator =
        new ListOfOrderedSentencesGenerator<>(
            sentenceToWordsStringMap, wordFrequencies, numberOfWordsAboveThreshold);

    List<String> sortedWeightedSentences = listOfOrderedSentencesGenerator.getSortedSentences();

    Optional<String> firstSentence =
        sortedWeightedSentences.stream().filter(ws -> !topSentencesSet.contains(ws)).findFirst();

    if (firstSentence.isPresent()) {

      String topSentence = firstSentence.get();
      topSentencesSet.add(topSentence);
      summaryBuilder.append(topSentence).append(" \n");
      wordFrequencies = listOfOrderedSentencesGenerator.adjustWordFrequencies(topSentence);
    } else {
      break;
    }

    summaryCharacterCount = summaryBuilder.length();
  }

  String summary = summaryBuilder.toString();

  log(summary);

  addSummaryToMetadata(jCas, summary);
}