Java Code Examples for org.apache.uima.fit.util.JCasUtil#indexCovering()

The following examples show how to use org.apache.uima.fit.util.JCasUtil#indexCovering() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DictionariesExtractor.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
public Set<Feature> extract(JCas jcas, TextClassificationTarget unit) throws TextClassificationException {
    if (!isTheSameDocument(jcas)) {
        logger.trace("Building index of covering dictionaries annotations...");
        dictionaryMap = JCasUtil.indexCovering(jcas, Token.class, DictionaryFeatureAnnotation.class);
    }

    Token token = JCasUtil.selectCovered(jcas, Token.class, unit).iterator().next();

    Set<String> dictionaryFeatureAnnotations = dictionaryMap.get(token)
            .stream()
            .map(DictionaryFeatureAnnotation::getDictionary)
            .collect(Collectors.toSet());

    return dictionaries.get(jcas.getDocumentLanguage())
            .stream()
            .map(d -> new Feature(d, dictionaryFeatureAnnotations.contains(d)))
            .collect(Collectors.toSet());
}
 
Example 2
Source File: Coreference.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
protected void write(JCas jCas) {

  final String source = getDocumentAnnotation(jCas).getSourceUri();

  // For each entity we need to find all the other sentences they are contained in

  // This should be all entities and sentences
  final Map<Entity, List<Sentence>> coveringSentence =
      JCasUtil.indexCovering(jCas, Entity.class, Sentence.class);
  final Map<Sentence, List<Entity>> coveredEntities =
      JCasUtil.indexCovered(jCas, Sentence.class, Entity.class);
  final Map<Sentence, List<WordToken>> coveredTokens =
      JCasUtil.indexCovered(jCas, Sentence.class, WordToken.class);
  final Map<WordToken, List<Entity>> coveringEntity =
      JCasUtil.indexCovering(jCas, WordToken.class, Entity.class);

  JCasUtil.select(jCas, Entity.class).stream()
      .map(
          e ->
              convertEntityToRow(
                  source, coveringSentence, coveredEntities, coveredTokens, coveringEntity, e))
      .filter(s -> s.length > 0)
      .forEach(this::write);
}
 
Example 3
Source File: ProperNounInformationCollector.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
public <T extends Entity> Set<EntityInformation<T>> getEntityInformation(
    JCas jCas, Class<T> clazz) {
  Multimap<ReferenceTarget, T> map = ReferentUtils.createReferentMap(jCas, clazz);
  Map<T, List<Sentence>> index = JCasUtil.indexCovering(jCas, clazz, Sentence.class);
  Map<T, List<WordToken>> tokens = JCasUtil.indexCovered(jCas, clazz, WordToken.class);

  Set<EntityInformation<T>> infos = new HashSet<>();
  for (Map.Entry<ReferenceTarget, Collection<T>> entry : map.asMap().entrySet()) {
    Collection<Sentence> sentences =
        entry.getValue().stream().flatMap(m -> index.get(m).stream()).collect(Collectors.toSet());

    List<T> properNouns =
        entry.getValue().stream()
            .filter(
                e ->
                    tokens.get(e).stream()
                        .map(WordToken::getPartOfSpeech)
                        .anyMatch("NNP"::equals))
            .collect(toList());

    infos.add(new EntityInformation<T>(entry.getKey(), properNouns, sentences));
  }

  return infos;
}
 
Example 4
Source File: JCasInformationCollector.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
public <T extends Entity> Set<EntityInformation<T>> getEntityInformation(
    JCas jCas, Class<T> clazz) {
  Multimap<ReferenceTarget, T> map = ReferentUtils.createReferentMap(jCas, clazz);
  Map<T, List<Sentence>> index = JCasUtil.indexCovering(jCas, clazz, Sentence.class);

  Set<EntityInformation<T>> infos = new HashSet<>();
  for (Map.Entry<ReferenceTarget, Collection<T>> entry : map.asMap().entrySet()) {
    Collection<Sentence> sentences =
        entry.getValue().stream().flatMap(m -> index.get(m).stream()).collect(Collectors.toSet());

    infos.add(new EntityInformation<T>(entry.getKey(), entry.getValue(), sentences));
  }

  return infos;
}
 
Example 5
Source File: RemoveInteractionInEntities.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
protected void doProcess(JCas jCas) throws AnalysisEngineProcessException {
  Map<Interaction, List<Entity>> covering =
      JCasUtil.indexCovering(jCas, Interaction.class, Entity.class);

  removeFromJCasIndex(covering.keySet());
}
 
Example 6
Source File: PartOfSpeechRelationshipAnnotator.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
protected void extract(JCas jCas) throws AnalysisEngineProcessException {

  Map<WordToken, List<Entity>> coveredEntities =
      JCasUtil.indexCovering(jCas, WordToken.class, Entity.class);

  Map<Sentence, List<WordToken>> sentences =
      JCasUtil.indexCovered(jCas, Sentence.class, WordToken.class);

  sentences.forEach((s, tokens) -> processSentence(jCas, s, sort(tokens), coveredEntities));
}
 
Example 7
Source File: UbmreConstituent.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
protected void preExtract(JCas jCas) {
  super.preExtract(jCas);

  parseTree = ParseTree.build(jCas);

  interactionCoveringTokens = JCasUtil.indexCovering(jCas, Interaction.class, WordToken.class);
}
 
Example 8
Source File: TextBlocks.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException {

  final Collection<Structure> structures = JCasUtil.select(jCas, Structure.class);

  if (structures.isEmpty()) {
    // If the jCas has no structural annotations then the entire text should be marked as a text
    // block

    final int end = jCas.getDocumentText().length();
    final Text t = new Text(jCas, 0, end);
    addToJCasIndex(t);

  } else {
    // Otherwise add the types we want...

    structures.stream()
        .filter(s -> structuralClasses.contains(s.getClass()))
        .map(s -> new Text(jCas, s.getBegin(), s.getEnd()))
        .forEach(this::addToJCasIndex);

    // Now remove any that cover others, so we keep only biggest/most detailed as per request
    final Map<Text, List<Text>> cover;
    if (keepSmallest) {
      cover = JCasUtil.indexCovering(jCas, Text.class, Text.class);
    } else {
      cover = JCasUtil.indexCovered(jCas, Text.class, Text.class);
    }
    cover.forEach(
        (t, c) ->
            c.remove(t)); // Remove where x has been pulled out as covering itself (potential bug
    // introduced in UIMAfit 2.3.0)
    cover.values().stream().flatMap(Collection::stream).forEach(this::removeFromJCasIndex);
  }
}
 
Example 9
Source File: DocumentFactory.java    From baleen with Apache License 2.0 5 votes vote down vote up
/**
 * Construct the document factory for the given jCas and {@link SentenceFactory}
 *
 * @param jCas to base the document on
 * @param sentenceFactory to use
 */
public DocumentFactory(JCas jCas, SentenceFactory sentenceFactory) {
  this(
      jCas,
      JCasUtil.indexCovering(jCas, Entity.class, Sentence.class),
      ReferentUtils.createReferentMap(jCas, Entity.class),
      sentenceFactory);
}
 
Example 10
Source File: SentenceFactory.java    From baleen with Apache License 2.0 5 votes vote down vote up
/**
 * Construct the sentence factory for the given jCas.
 *
 * @param jCas to create senteces from
 */
public SentenceFactory(JCas jCas) {
  this(
      JCasUtil.indexCovered(jCas, Sentence.class, WordToken.class),
      JCasUtil.indexCovering(jCas, WordToken.class, Entity.class),
      JCasUtil.indexCovering(jCas, WordToken.class, PhraseChunk.class),
      JCasUtil.indexCovered(jCas, Sentence.class, Dependency.class));
}
 
Example 11
Source File: CsvEvent.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
protected void write(JCas jCas) {
  final String source = getDocumentAnnotation(jCas).getSourceUri();

  final Map<Event, List<Sentence>> coveringSentence =
      JCasUtil.indexCovering(jCas, Event.class, Sentence.class);

  JCasUtil.select(jCas, Event.class).stream()
      .map(e -> extracted(source, coveringSentence, e))
      .filter(s -> s.length > 0)
      .forEach(this::write);
}
 
Example 12
Source File: CsvRelation.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
protected void write(JCas jCas) {
  final String source = getDocumentAnnotation(jCas).getSourceUri();

  final Map<Relation, List<Sentence>> coveringSentence =
      JCasUtil.indexCovering(jCas, Relation.class, Sentence.class);

  JCasUtil.select(jCas, Relation.class).stream()
      .map(
          r -> {
            String sentence = "";
            final List<Sentence> sentences = coveringSentence.get(r);
            if (!sentences.isEmpty()) {
              sentence = sentences.iterator().next().getCoveredText();
            }

            return new String[] {
              source,
              sentence,
              r.getRelationshipType(),
              r.getRelationSubType(),
              normalize(r.getSource().getValue()),
              normalize(r.getTarget().getValue()),
              normalize(r.getSource().getCoveredText()),
              normalize(r.getTarget().getCoveredText()),
              r.getSource().getType().getShortName(),
              r.getTarget().getType().getShortName(),
              Double.toString(r.getConfidence())
            };
          })
      .forEach(this::write);
}
 
Example 13
Source File: ParseTree.java    From baleen with Apache License 2.0 4 votes vote down vote up
/**
 * Builds the tree.
 *
 * @param jCas the j cas
 * @return the parses the tree
 */
public static ParseTree build(JCas jCas) {

  // Build a tree phrase to phrase

  final Map<PhraseChunk, List<PhraseChunk>> index =
      JCasUtil.indexCovering(jCas, PhraseChunk.class, PhraseChunk.class);

  final Collection<PhraseChunk> phrases = JCasUtil.select(jCas, PhraseChunk.class);

  final List<ParseTreeNode> roots = new LinkedList<>();
  final Map<PhraseChunk, ParseTreeNode> chunkToNode = new HashMap<>();

  for (final PhraseChunk chunk : phrases) {

    ParseTreeNode treeNode = chunkToNode.computeIfAbsent(chunk, ParseTreeNode::new);

    final Collection<PhraseChunk> covering = index.get(chunk);
    if (covering == null || covering.isEmpty()) {
      // Nothing is covering this Jcas, so its a root
      roots.add(treeNode);
    } else {
      // This is covered, so we add the smallest one as out parent
      final PhraseChunk parent = findSmallest(covering);

      ParseTreeNode parentNode = chunkToNode.get(parent);
      if (parentNode == null) {
        parentNode = new ParseTreeNode(parent);
        chunkToNode.put(parent, parentNode);
      }

      treeNode.setParent(parentNode);
      parentNode.addChild(treeNode);
    }
  }

  // Add words to the tree

  final Map<PhraseChunk, List<WordToken>> wordIndex =
      JCasUtil.indexCovered(jCas, PhraseChunk.class, WordToken.class);

  final Map<WordToken, ParseTreeNode> wordToNode = new HashMap<>();

  chunkToNode
      .values()
      .forEach(
          n -> {
            // Sort all tree nodes by sentence order
            n.getChildren().sort(SENTENCE_ORDER);

            // Get all the words which are within this chunk, and then remove those which are in
            // children
            final Collection<WordToken> allWords = wordIndex.get(n.getChunk());
            if (allWords != null) {
              final List<WordToken> words = new ArrayList<>(allWords);

              // Remove the words which are covered by our children, leaving just our words
              if (n.hasChildren()) {
                n.getChildren().stream()
                    .map(t -> wordIndex.get(t.getChunk()))
                    .filter(Objects::nonNull)
                    .forEach(words::removeAll);
              }

              // Add the words into the treenode
              n.addWords(words);
              words.stream().forEach(w -> wordToNode.put(w, n));
            }
          });

  // Sort roots

  roots.sort(SENTENCE_ORDER);

  return new ParseTree(roots, chunkToNode, wordToNode);
}
 
Example 14
Source File: MongoEvents.java    From baleen with Apache License 2.0 4 votes vote down vote up
private <T extends Base> void saveEvents(String documentId, JCas jCas, Class<T> textClass) {

    final Map<Event, List<T>> coveringText = JCasUtil.indexCovering(jCas, Event.class, textClass);

    List<Document> eventDocuments =
        JCasUtil.select(jCas, Event.class).stream()
            .map(
                e -> {
                  String text =
                      coveringText.get(e).stream()
                          .map(T::getCoveredText)
                          .collect(Collectors.joining(" "));

                  // @formatter:off
                  Document document =
                      new Document()
                          .append(FIELD_TEXT, text)
                          .append(FIELD_ENTITIES, getEntityDocuments(e))
                          .append(FIELD_DOCUMENT_ID, documentId)
                          .append(FIELD_TYPES, getEventTypes(e))
                          .append(FIELD_VALUE, e.getValue())
                          .append(FIELD_TOKENS, getEventTokens(e))
                          .append(FIELD_BEGIN, e.getBegin())
                          .append(FIELD_END, e.getEnd())
                          .append(FIELD_CONFIDENCE, e.getConfidence());

                  if (outputHistory) {
                    HistoryConverter converter =
                        new HistoryConverter(
                            e, fields, getSupport().getDocumentHistory(jCas), getMonitor());
                    Map<String, Object> historyMap = converter.convert();
                    document.append(FIELD_HISTORY, historyMap);
                  }

                  return document;

                  // @formatter:on
                })
            .collect(Collectors.toList());

    if (!eventDocuments.isEmpty()) {
      eventsCollection.insertMany(eventDocuments);
    }
  }
 
Example 15
Source File: MongoRelations.java    From baleen with Apache License 2.0 4 votes vote down vote up
private void saveRelations(String documentId, JCas jCas) {
  final Map<Relation, List<Sentence>> coveringSentence =
      JCasUtil.indexCovering(jCas, Relation.class, Sentence.class);

  List<Document> rels =
      JCasUtil.select(jCas, Relation.class).stream()
          .map(
              r -> {
                String sentence =
                    coveringSentence.get(r).stream()
                        .map(Sentence::getCoveredText)
                        .collect(Collectors.joining(". "));

                // @formatter:off
                return new Document()
                    .append(fields.getExternalId(), r.getExternalId())
                    .append(FIELD_RELATIONSHIP_TYPE, r.getRelationshipType())
                    .append(FIELD_RELATIONSHIP_SUBTYPE, r.getRelationSubType())
                    .append(FIELD_SOURCE_VALUE, r.getSource().getValue())
                    .append(FIELD_SOURCE_TYPE, r.getSource().getType().getShortName())
                    .append(FIELD_SOURCE_TYPE_FULL, r.getSource().getType().getName())
                    .append(FIELD_VALUE, r.getValue())
                    .append(FIELD_TARGET_VALUE, r.getTarget().getValue())
                    .append(FIELD_TARGET_TYPE, r.getTarget().getType().getShortName())
                    .append(FIELD_TARGET_TYPE_FULL, r.getTarget().getType().getName())
                    .append(FIELD_SENTENCE, sentence)
                    .append(FIELD_DOCUMENT_ID, documentId)
                    .append(FIELD_SOURCE, r.getSource().getExternalId())
                    .append(FIELD_TARGET, r.getTarget().getExternalId())
                    .append(FIELD_BEGIN, r.getBegin())
                    .append(FIELD_END, r.getEnd())
                    .append(FIELD_CONFIDENCE, r.getConfidence())
                    .append(FIELD_SENTENCE_DISTANCE, r.getSentenceDistance())
                    .append(FIELD_NORMAL_SENTENCE_DISTANCE, normalize(r.getSentenceDistance()))
                    .append(FIELD_WORD_DISTANCE, r.getWordDistance())
                    .append(FIELD_NORMAL_WORD_DISTANCE, normalize(r.getWordDistance()))
                    .append(FIELD_DEPENDENCY_DISTANCE, r.getDependencyDistance())
                    .append(
                        FIELD_NORMAL_DEPENDENCY_DISTANCE, normalize(r.getDependencyDistance()));
                // @formatter:on

              })
          .collect(Collectors.toList());

  if (!rels.isEmpty()) {
    relationsCollection.insertMany(rels);
  }
}
 
Example 16
Source File: AbstractReNounRelationshipAnnotator.java    From baleen with Apache License 2.0 3 votes vote down vote up
@Override
protected void extract(JCas jCas) throws AnalysisEngineProcessException {

  Map<WordToken, List<Entity>> entities =
      JCasUtil.indexCovering(jCas, WordToken.class, Entity.class);

  DependencyGraph dependencyGraph = DependencyGraph.build(jCas);

  patterns.get().forEach(seed -> processTree(seed, jCas, dependencyGraph, entities));
}