Java Code Examples for org.apache.uima.jcas.tcas.Annotation#setStringValue()

The following examples show how to use org.apache.uima.jcas.tcas.Annotation#setStringValue() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MongoFieldMapping.java    From bluima with Apache License 2.0 6 votes vote down vote up
public static void readFieldFromDb(String fieldKey, String range,
        Annotation a, Feature f, BasicDBObject dbO, JCas jCas) {

    if (dbO.containsField(fieldKey)) {

        if (range.equals("String")) {
            a.setStringValue(f, dbO.getString(fieldKey));
        } else if (range.equals("StringArray")) {
            BasicDBList vals = (BasicDBList) dbO.get(fieldKey);
            StringArray sa = new StringArray(jCas, vals.size());
            for (int i = 0; i < vals.size(); i++) {
                sa.set(i, vals.get(i).toString());
            }
            a.setFeatureValue(f, sa);
        } else if (range.equals("Integer")) {
            a.setIntValue(f, dbO.getInt(fieldKey));
        } else if (range.equals("Float")) {
            a.setFloatValue(f, (float) dbO.getDouble(fieldKey));
        } else if (range.equals("Boolean")) {
            a.setBooleanValue(f, dbO.getBoolean(fieldKey));
        } else {
            LOG.warn("range not supported " + range);
        }
    }
}
 
Example 2
Source File: CASImpl.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
@Override
public void setDocumentLanguage(String languageCode) {
  if (this == this.svd.baseCAS) {
    throw new CASRuntimeException(CASRuntimeException.INVALID_BASE_CAS_METHOD, "setDocumentLanguage(String)");
  }
  Annotation docAnnot = getDocumentAnnotation();
  FeatureImpl languageFeature = getTypeSystemImpl().langFeat;
  languageCode = Language.normalize(languageCode);
  boolean wasRemoved = this.checkForInvalidFeatureSetting(docAnnot, languageFeature.getCode(), this.getAddbackSingle());
  docAnnot.setStringValue(getTypeSystemImpl().langFeat, languageCode);
  addbackSingleIfWasRemoved(wasRemoved, docAnnot);
}
 
Example 3
Source File: TokenBuilder.java    From uima-uimafit with Apache License 2.0 4 votes vote down vote up
/**
 * Build tokens for the given text, tokens, part-of-speech tags, and word stems.
 * 
 * @param aJCas
 *          the JCas to add the Token annotations to
 * @param aText
 *          the text to initialize the {@link JCas} with
 * @param aTokensString
 *          the tokensString must have the same non-white space characters as the text. The
 *          tokensString is used to identify token boundaries using white space - i.e. the only
 *          difference between the 'text' parameter and the 'tokensString' parameter is that the
 *          latter may have more whitespace characters. For example, if the text is "She ran."
 *          then the tokensString might be "She ran ."
 * @param aPosTagsString
 *          the posTagsString should be a space delimited string of part-of-speech tags - one for
 *          each token
 * @param aStemsString
 *          the stemsString should be a space delimited string of stems - one for each token
 */
public void buildTokens(JCas aJCas, String aText, String aTokensString, String aPosTagsString,
        String aStemsString) {
  aJCas.setDocumentText(aText);

  if (aPosTagsString != null && posFeatureName == null) {
    throw new IllegalArgumentException("posTagsString must be null if TokenBuilder is "
            + "not initialized with a feature name corresponding to the part-of-speech "
            + "feature of the token type (assuming your token type has such a feature).");
  }

  if (aStemsString != null && stemFeatureName == null) {
    throw new IllegalArgumentException("stemsString must be null if TokenBuilder is not "
            + "initialized with a feature name corresponding to the part-of-speech feature "
            + "of the token type (assuming your token type has such a feature).");
  }

  Feature posFeature = null;
  if (posFeatureName != null) {
    // String fullPosFeatureName = tokenClass.getClass().getName()+":"+posFeatureName;
    // posFeature = jCas.getTypeSystem().getFeatureByFullName(fullPosFeatureName);
    posFeature = aJCas.getTypeSystem().getType(tokenClass.getName())
            .getFeatureByBaseName(posFeatureName);
  }
  Feature stemFeature = null;
  if (stemFeatureName != null) {
    stemFeature = aJCas.getTypeSystem().getType(tokenClass.getName())
            .getFeatureByBaseName(stemFeatureName);
  }

  String tokensString = aTokensString.replaceAll("\\s*\n\\s*", "\n");
  String[] sentenceStrings = tokensString.split("\n");
  String[] posTags = aPosTagsString != null ? aPosTagsString.split("\\s+") : null;
  String[] stems = aStemsString != null ? aStemsString.split("\\s+") : null;

  int offset = 0;
  int tokenIndex = 0;

  for (String sentenceString : sentenceStrings) {
    String[] tokenStrings = sentenceString.trim().split("\\s+");
    List<Annotation> tokenAnnotations = new ArrayList<Annotation>();
    for (String tokenString : tokenStrings) {
      // move the offset up to the beginning of the token
      while (!aText.startsWith(tokenString, offset)) {
        offset++;
        if (offset > aText.length()) {
          throw new IllegalArgumentException(String.format("unable to find string %s",
                  tokenString));
        }
      }

      // add the Token
      int start = offset;
      offset = offset + tokenString.length();
      Annotation token = AnnotationFactory.createAnnotation(aJCas, start, offset, tokenClass);
      tokenAnnotations.add(token);

      // set the stem and part of speech if present
      if (posTags != null) {
        token.setStringValue(posFeature, posTags[tokenIndex]);
      }
      if (stems != null) {
        token.setStringValue(stemFeature, stems[tokenIndex]);
      }
      tokenIndex++;
    }
    if (!tokenAnnotations.isEmpty()) {
      int begin = tokenAnnotations.get(0).getBegin();
      int end = tokenAnnotations.get(tokenAnnotations.size() - 1).getEnd();
      AnnotationFactory.createAnnotation(aJCas, begin, end, sentenceClass);
    }
  }
}