Java Code Examples for org.apache.uima.fit.util.JCasUtil#exists()

The following examples show how to use org.apache.uima.fit.util.JCasUtil#exists() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LocalFeaturesTcAnnotator.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
@Override
public void process(JCas jcas)
        throws AnalysisEngineProcessException
{
    if (!JCasUtil.exists(jcas, JCasId.class)) {
        JCasId id = new JCasId(jcas);
        id.setId(jcasId++);
        id.addToIndexes();
    }

    switch (featureMode) {
        case Constants.FM_DOCUMENT:
            processDocument(jcas);
            break;
        case Constants.FM_PAIR:
            // same as document
            processDocument(jcas);
            break;
        case Constants.FM_SEQUENCE:
            processSequence(jcas);
            break;
        case Constants.FM_UNIT:
            processUnit(jcas);
            break;
    }
}
 
Example 2
Source File: LocalFeaturesTcAnnotator.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
private void processDocument(JCas jcas)
        throws AnalysisEngineProcessException
{
    if (!JCasUtil.exists(jcas, TextClassificationTarget.class)) {
        TextClassificationTarget target = new TextClassificationTarget(jcas, 0,
                jcas.getDocumentText().length());
        target.addToIndexes();
    }

    // we need an outcome annotation to be present
    if (!JCasUtil.exists(jcas, TextClassificationOutcome.class)) {
        TextClassificationOutcome outcome = new TextClassificationOutcome(jcas);
        outcome.setOutcome("");
        outcome.addToIndexes();
    }

    // create new UIMA annotator in order to separate the parameter spaces
    // this annotator will get initialized with its own set of parameters loaded from the model
    try {
        engine.process(jcas);
    }
    catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
}
 
Example 3
Source File: SparkUimaUtils.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
public static void createSequenceFile(Object[] params, String uri)
    throws URISyntaxException, IOException, UIMAException, NoSuchMethodException, MissingSettingException, ClassNotFoundException {
  Configuration conf = new Configuration();
  Path path = new Path(uri);
  Writer writer =
      SequenceFile.createWriter(
          conf, Writer.file(path),
          Writer.keyClass(Text.class),
          Writer.valueClass(SCAS.class));

  int count = 0;

  CollectionReaderDescription readerDescription = Reader.getCollectionReaderDescription(Reader.COLLECTION_FORMAT.NYT, params);
  for (JCas jCas : SimplePipelineCasPoolIterator.iteratePipeline(20, readerDescription)) {
      if(JCasUtil.exists(jCas, DocumentMetaData.class)) {
        ++count;
        // Get the ID.
        DocumentMetaData dmd = JCasUtil.selectSingle(jCas, DocumentMetaData.class);
        String docId = "NULL";
        if (dmd != null) {
          docId = dmd.getDocumentId();
        } else {
          throw new IOException("No Document ID for xml: " + jCas.getView("xml").getDocumentText());
        }
        Text docIdText = new Text(docId);
        SCAS scas = new SCAS(jCas.getCas());
        writer.append(docIdText, scas);
      }
      jCas.release();
  }
  logger.info("Wrote " + count + " documents to " + uri);
  IOUtils.closeStream(writer);
}
 
Example 4
Source File: DKPro2Tcf.java    From inception with Apache License 2.0 5 votes vote down vote up
public void writePosTags(JCas aJCas, TextCorpus aTextCorpus,
        Map<Integer, eu.clarin.weblicht.wlfxb.tc.api.Token> aTokensBeginPositionMap)
{
    if (!JCasUtil.exists(aJCas, POS.class)) {
        // Do nothing if there are no part-of-speech tags in the CAS
        log.debug("Layer [{}]: empty", TextCorpusLayerTag.POSTAGS.getXmlName());
        return;
    }

    // Tokens layer must already exist
    TokensLayer tokensLayer = aTextCorpus.getTokensLayer();
    
    // create POS tag annotation layer
    String posTagSet = "STTS";
    for (TagsetDescription tagSet : select(aJCas, TagsetDescription.class)) {
        if (tagSet.getLayer().equals(POS.class.getName())) {
            posTagSet = tagSet.getName();
            break;
        }
    }
    
    PosTagsLayer posLayer = aTextCorpus.createPosTagsLayer(posTagSet);
    
    log.debug("Layer [{}]: created", TextCorpusLayerTag.POSTAGS.getXmlName());
    
    int j = 0;
    for (Token coveredToken : select(aJCas, Token.class)) {
        POS pos = coveredToken.getPos();

        if (pos != null && posLayer != null ) {
            String posValue = coveredToken.getPos().getPosValue();
            posLayer.addTag(posValue, tokensLayer.getToken(j));
        }

        j++;
    }
}
 
Example 5
Source File: DKPro2Tcf.java    From inception with Apache License 2.0 5 votes vote down vote up
public void writeLemmas(JCas aJCas, TextCorpus aTextCorpus,
        Map<Integer, eu.clarin.weblicht.wlfxb.tc.api.Token> aTokensBeginPositionMap)
{
    if (!JCasUtil.exists(aJCas, Lemma.class)) {
        // Do nothing if there are no lemmas in the CAS
        log.debug("Layer [{}]: empty", TextCorpusLayerTag.LEMMAS.getXmlName());
        return;
    }
    
    // Tokens layer must already exist
    TokensLayer tokensLayer = aTextCorpus.getTokensLayer();
    
    // create lemma annotation layer
    LemmasLayer lemmasLayer = aTextCorpus.createLemmasLayer();

    log.debug("Layer [{}]: created", TextCorpusLayerTag.LEMMAS.getXmlName());

    int j = 0;
    for (Token coveredToken : select(aJCas, Token.class)) {
        Lemma lemma = coveredToken.getLemma();
        if (lemma != null && lemmasLayer != null) {
            String lemmaValue = coveredToken.getLemma().getValue();
            lemmasLayer.addLemma(lemmaValue, tokensLayer.getToken(j));
        }
        j++;
    }
    
}
 
Example 6
Source File: DKPro2Tcf.java    From inception with Apache License 2.0 5 votes vote down vote up
public void writeOrthograph(JCas aJCas, TextCorpus aTextCorpus) {
    if (!JCasUtil.exists(aJCas, SofaChangeAnnotation.class)) {
        // Do nothing if there are no SofaChangeAnnotation layer
        // (Which is equivalent to Orthography layer in TCF) in the CAS
        log.debug("Layer [{}]: empty", TextCorpusLayerTag.ORTHOGRAPHY.getXmlName());
        return;
    }

    // Tokens layer must already exist
    TokensLayer tokensLayer = aTextCorpus.getTokensLayer();

    // create orthographyLayer annotation layer
    OrthographyLayer orthographyLayer = aTextCorpus.createOrthographyLayer();

    log.debug("Layer [{}]: created", TextCorpusLayerTag.ORTHOGRAPHY.getXmlName());

    int j = 0;
    for (Token token : select(aJCas, Token.class)) {
        List<SofaChangeAnnotation> scas = selectCovered(aJCas, SofaChangeAnnotation.class,
                token.getBegin(), token.getEnd());
        if (scas.size() > 0 && orthographyLayer != null) {
            SofaChangeAnnotation change = scas.get(0);
            
            orthographyLayer.addCorrection(scas.get(0).getValue(), tokensLayer.getToken(j),
                    Optional.ofNullable(change.getOperation()).map(CorrectionOperation::valueOf)
                            .orElse(null));
        }
        j++;
    }

}
 
Example 7
Source File: DKPro2Tcf.java    From inception with Apache License 2.0 5 votes vote down vote up
public void writeNamedEntity(JCas aJCas, TextCorpus aTextCorpus,
        Map<Integer, eu.clarin.weblicht.wlfxb.tc.api.Token> aTokensBeginPositionMap)
{
    if (!JCasUtil.exists(aJCas, NamedEntity.class)) {
        // Do nothing if there are no named entities in the CAS
        log.debug("Layer [{}]: empty", TextCorpusLayerTag.NAMED_ENTITIES.getXmlName());
        return;
    }
    
    String tagSetName = "BART";
    for (TagsetDescription tagSet : select(aJCas, TagsetDescription.class)) {
        if (tagSet.getLayer().equals(NamedEntity.class.getName())) {
            tagSetName = tagSet.getName();
            break;
        }
    }

    NamedEntitiesLayer namedEntitiesLayer = aTextCorpus.createNamedEntitiesLayer(tagSetName);

    log.debug("Layer [{}]: created", TextCorpusLayerTag.NAMED_ENTITIES.getXmlName());
    
    for (NamedEntity namedEntity : select(aJCas, NamedEntity.class)) {
        List<Token> tokensInCas = selectCovered(aJCas, Token.class, namedEntity.getBegin(),
                namedEntity.getEnd());
        List<eu.clarin.weblicht.wlfxb.tc.api.Token> tokensInTcf = new ArrayList<>();
        for (Token token : tokensInCas) {
            tokensInTcf.add(aTokensBeginPositionMap.get(token.getBegin()));
        }
        namedEntitiesLayer.addEntity(namedEntity.getValue(), tokensInTcf);
    }
}
 
Example 8
Source File: BlueCasUtil.java    From bluima with Apache License 2.0 5 votes vote down vote up
public static JCas setDocId(JCas jCas, int docId) {
    if (JCasUtil.exists(jCas, Header.class)) {
        throw new IllegalArgumentException();
    }
    Header h = new Header(jCas);
    h.setDocId(docId + "");
    h.addToIndexes();
    return jCas;
}
 
Example 9
Source File: DKPro2Tcf.java    From inception with Apache License 2.0 4 votes vote down vote up
public void writeDependency(JCas aJCas, TextCorpus aTextCorpus,
        Map<Integer, eu.clarin.weblicht.wlfxb.tc.api.Token> aTokensBeginPositionMap)
{
    if (!JCasUtil.exists(aJCas, Dependency.class)) {
        // Do nothing if there are no dependencies in the CAS
        log.debug("Layer [{}]: empty", TextCorpusLayerTag.PARSING_DEPENDENCY.getXmlName());
        return;
    }

    DependencyParsingLayer dependencyParsingLayer = null;
    String tagSetName = "tiger";
    for (TagsetDescription tagSet : select(aJCas, TagsetDescription.class)) {
        if (tagSet.getLayer().equals(Dependency.class.getName())) {
            tagSetName = tagSet.getName();
            break;
        }
    }
    
    Optional<Dependency> hasNonBasic = select(aJCas, Dependency.class).stream()
        .filter(dep -> dep.getFlavor() != null && 
                !DependencyFlavor.BASIC.equals(dep.getFlavor()))
        .findAny();
    
    dependencyParsingLayer = aTextCorpus.createDependencyParsingLayer(tagSetName,
            hasNonBasic.isPresent(), true);

    log.debug("Layer [{}]: created", TextCorpusLayerTag.PARSING_DEPENDENCY.getXmlName());
    
    for (Sentence s : select(aJCas, Sentence.class)) {
        List<eu.clarin.weblicht.wlfxb.tc.api.Dependency> deps = new ArrayList<>();
        for (Dependency d : selectCovered(Dependency.class, s)) {
            eu.clarin.weblicht.wlfxb.tc.api.Dependency dependency = dependencyParsingLayer
                    .createDependency(d.getDependencyType(),
                            aTokensBeginPositionMap.get(d.getDependent().getBegin()),
                            aTokensBeginPositionMap.get(d.getGovernor().getBegin()));

            deps.add(dependency);
        }
        if (deps.size() > 0) {
            dependencyParsingLayer.addParse(deps);
        }
    }
}