org.apache.uima.cas.impl.XCASSerializer Java Examples

The following examples show how to use org.apache.uima.cas.impl.XCASSerializer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CreateSampleXCASFile.java    From uima-uimafit with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws UIMAException, SAXException, IOException {
  TokenBuilder<Token, Sentence> tokenBuilder = new TokenBuilder<Token, Sentence>(Token.class,
          Sentence.class, "pos", "stem");
  JCas jCas = JCasFactory.createJCas();
  // quote from http://www.gutenberg.org/files/20417/20417-h/20417-h.htm
  String text = "... the more knowledge advances the more it becomes possible to condense it into little books.";
  tokenBuilder
          .buildTokens(
                  jCas,
                  text,
                  "... the more knowledge advances the more it becomes possible to condense it into little books . ",
                  ". T M K A T M I B P T C I I L B .",
                  "... the more knowledge advance the more it become possible to condense it into little book . ");

  FileOutputStream out = new FileOutputStream("src/test/resources/data/docs/test.xcas");
  XCASSerializer ser = new XCASSerializer(jCas.getTypeSystem());
  XMLSerializer xmlSer = new XMLSerializer(out, false);
  ser.serialize(jCas.getCas(), xmlSer.getContentHandler());
  out.close();
}
 
Example #2
Source File: CASTransportable.java    From uima-uimaj with Apache License 2.0 6 votes vote down vote up
public void startElement(String uri, String name, String qName, org.xml.sax.Attributes atts)
        throws SAXException {
  try {
    // Debug.p("Start element: " + qName + " : " + serializer.getNumChildren());
    os.write(XTalkTransporter.ELEMENT_MARKER);
    if (!started) {
      Debug.Assert(XCASSerializer.casTagName.equals(qName));
      started = true;
      // for some reason we have to replace "CAS" with "KEYS" as the CAS root tag.
      XTalkTransporter.stringToBin(Constants.KEYS, os);
      started = true;
    } else {
      XTalkTransporter.stringToBin(qName, os);
    }
    attributesToXTalk(atts);
    XTalkTransporter.writeInt(serializer.getNumChildren(), os); // HACK to find out # of
    // children
  } catch (IOException e) {
    throw wrapAsSAXException(e);
  }
}
 
Example #3
Source File: XCasToCasDataSaxHandler.java    From uima-uimaj with Apache License 2.0 6 votes vote down vote up
public void startElement(String nameSpaceURI, String localName, String qualifiedName,
        Attributes attrs) throws SAXException {
  resetBuffer();
  switch (state) {
    case DOC_STATE: {
      if (!qualifiedName.equals(XCASSerializer.casTagName)) {
        throw createException(XCASParsingException.WRONG_ROOT_TAG, qualifiedName);
      }
      this.state = FS_STATE;
      break;
    }
    case FS_STATE: {
      this.currentContentFeat = DEFAULT_CONTENT_FEATURE;
      readFS(qualifiedName, attrs);
      break;
    }
    case ARRAY_ELE_STATE: {
      readArrayElement(qualifiedName, attrs);
      break;
    }
    default: {
      // If we're not in an element expecting state, raise an error.
      throw createException(XCASParsingException.TEXT_EXPECTED, qualifiedName);
    }
  }
}
 
Example #4
Source File: NewPrimitiveTypesTest.java    From uima-uimaj with Apache License 2.0 6 votes vote down vote up
public void testXCASSerialization() throws Exception {

    // create FS
    createExampleFS(cas);

    // serialize
    XCASSerializer ser = new XCASSerializer(cas.getTypeSystem());
    OutputStream outputXCAS = new FileOutputStream(JUnitExtension
            .getFile("ExampleCas/newprimitives.xcas"));
    XMLSerializer xmlSer = new XMLSerializer(outputXCAS);
    ser.serialize(cas, xmlSer.getContentHandler());

    // reset
    cas.reset();

    // deserialize
    InputStream inputXCAS = new FileInputStream(JUnitExtension
            .getFile("ExampleCas/newprimitives.xcas"));
    XCASDeserializer.deserialize(inputXCAS, cas, false);

    // check values
    validateFSData(cas);

  }
 
Example #5
Source File: XCasToCasDataSaxHandlerTest.java    From uima-uimaj with Apache License 2.0 6 votes vote down vote up
/**
 * @param casData
 * @param system
 */
private void assertValidCasData(CasData casData, TypeSystem typeSystem) {
  Type annotType = typeSystem.getType(CAS.TYPE_NAME_ANNOTATION);
  Type arrayType = typeSystem.getType(CAS.TYPE_NAME_ARRAY_BASE);
  Iterator<FeatureStructure> fsIter = casData.getFeatureStructures();
  while (fsIter.hasNext()) {
    org.apache.uima.cas_data.FeatureStructure fs = fsIter.next();
    String typeName = fs.getType();

    // don't do tests on the "fake" document text FS
    if (XCASSerializer.DEFAULT_DOC_TYPE_NAME.equals(typeName))
      continue;

    Type type = typeSystem.getType(typeName);
    Assert.assertNotNull(type);
    if (typeSystem.subsumes(annotType, type)) {
      // annotation type - check for presence of begin/end
      FeatureValue beginVal = fs.getFeatureValue("begin");
      Assert.assertTrue(beginVal instanceof PrimitiveValue);
      Assert.assertTrue(((PrimitiveValue) beginVal).toInt() >= 0);
      FeatureValue endVal = fs.getFeatureValue("end");
      Assert.assertTrue(endVal instanceof PrimitiveValue);
      Assert.assertTrue(((PrimitiveValue) endVal).toInt() >= 0);
    }
  }
}
 
Example #6
Source File: XCasToCasDataSaxHandler.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
private void readArrayElement(String eleName, Attributes attrs) throws SAXParseException {
  if (!eleName.equals(XCASSerializer.ARRAY_ELEMENT_TAG)) {
    throw createException(XCASParsingException.ARRAY_ELE_EXPECTED, eleName);
  }
  if (attrs.getLength() > 0) {
    throw createException(XCASParsingException.ARRAY_ELE_ATTRS);
  }
  this.state = ARRAY_ELE_CONTENT_STATE;
  resetBuffer();
}
 
Example #7
Source File: XCasToCasDataSaxHandler.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
private void readFS(FeatureStructureImpl fsImpl, Attributes attrs) throws SAXParseException {
  String attrName, attrValue;
  for (int i = 0; i < attrs.getLength(); i++) {
    attrName = attrs.getQName(i);
    attrValue = attrs.getValue(i);
    if (attrName.startsWith(reservedAttrPrefix)) {
      if (attrName.equals(XCASSerializer.ID_ATTR_NAME)) {
        fsImpl.setId(attrValue);
      } else if (attrName.equals(XCASSerializer.CONTENT_ATTR_NAME)) {
        this.currentContentFeat = attrValue;
      } else if (attrName.equals(XCASSerializer.INDEXED_ATTR_NAME)) {
        if (attrValue.equals(XCASSerializer.TRUE_VALUE)) {
          fsImpl.setIndexed(new int[] { 1 }); // Backwards compatible CAS, has one default text
          // Sofa
        } else if (!attrValue.equals("false")) {
          fsImpl.setIndexed(parseIntArray(attrValue));
        }
      } else {
        handleFeature(fsImpl, attrName, attrValue);
      }
    } else {
      handleFeature(fsImpl, attrName, attrValue);
    }
  }

  // Set the state; we're either expecting features, or _content.
  // APL - 6/28/04 - even if _content attr is not specified, we can still have content, which
  // would
  // be assigned to the "value" feature, as per XCAS spec. FEAT_STATE did not really seem to be
  // working, anyway.
  this.state = CONTENT_STATE;
  // if (this.state != CONTENT_STATE)
  // {
  // this.state = FEAT_STATE;
  // }
}
 
Example #8
Source File: XCasToCasDataSaxHandler.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
private void handleFeature(FeatureStructureImpl fsImpl, String featName, String featVal)
        throws SAXParseException {
  if (featName.startsWith(XCASSerializer.REF_PREFIX)) {
    String realFeatName = featName.substring(XCASSerializer.REF_PREFIX.length());
    fsImpl.setFeatureValue(realFeatName, new ReferenceValueImpl(featVal));
  } else {
    fsImpl.setFeatureValue(featName, new PrimitiveValueImpl(featVal));
  }
}
 
Example #9
Source File: XCasWriterCasConsumer.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
/**
 * Serialize a CAS to a file in XCAS format.
 *
 * @param aCas          CAS to serialize
 * @param name          output file
 * @throws IOException           if an I/O failure occurs
 * @throws SAXException           if an error occurs generating the XML text
 */
private void writeXCas(CAS aCas, File name) throws IOException, SAXException {

  try (OutputStream out = new FileOutputStream(name)) {
    XCASSerializer ser = new XCASSerializer(aCas.getTypeSystem());
    XMLSerializer xmlSer = new XMLSerializer(out, false);
    ser.serialize(aCas, xmlSer.getContentHandler());
  }
}
 
Example #10
Source File: XCasToCasDataSaxHandler.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
private void readArray(String type, Attributes attrs) throws SAXParseException {
  String attrName, attrVal;
  int[] indexed = Constants.EMPTY_INT_ARRAY;
  int size = 0;
  String id = null;
  for (int i = 0; i < attrs.getLength(); i++) {
    attrName = attrs.getQName(i);
    attrVal = attrs.getValue(i);
    if (attrName.equals(XCASSerializer.ID_ATTR_NAME)) {
      id = attrVal;
    } else if (attrName.equals(XCASSerializer.ARRAY_SIZE_ATTR)) {
      try {
        size = Integer.parseInt(attrVal);
        if (size < 0) {
          throw createException(XCASParsingException.ILLEGAL_ARRAY_SIZE, attrVal);
        }
      } catch (NumberFormatException e) {
        throw createException(XCASParsingException.INTEGER_EXPECTED, attrVal);
      }
    } else if (attrName.equals(XCASSerializer.INDEXED_ATTR_NAME)) {
      if (attrVal.equals(XCASSerializer.TRUE_VALUE)) {
        indexed = new int[] { 1 }; // Backwards compatible CAS, has one default text Sofa
      } else if (!attrVal.equals("false")) {
        indexed = parseIntArray(attrVal);
      }
    } else {
      throw createException(XCASParsingException.ILLEGAL_ARRAY_ATTR, attrName);
    }
  }
  // Hang on to those for setting array values.
  this.arrayPos = 0;
  if (CAS.TYPE_NAME_INTEGER_ARRAY.equals(type)) {
    this.currentFS = new PrimitiveArrayFSImpl(new int[size]);
    this.arrayType = INT_TYPE;
  } else if (CAS.TYPE_NAME_FLOAT_ARRAY.equals(type)) {
    this.currentFS = new PrimitiveArrayFSImpl(new float[size]);
    this.arrayType = FLOAT_TYPE;
  } else if (CAS.TYPE_NAME_STRING_ARRAY.equals(type)) {
    this.currentFS = new PrimitiveArrayFSImpl(new String[size]);
    this.arrayType = STRING_TYPE;
  } else {
    this.currentFS = new ReferenceArrayFSImpl(new String[size]);
    this.arrayType = FS_TYPE;
  }
  this.currentFS.setId(id);
  this.currentFS.setType(type);
  this.currentFS.setIndexed(indexed);
  this.currentFS.setFeatureValue(XCASSerializer.ARRAY_SIZE_ATTR, new PrimitiveValueImpl(size));
  this.state = ARRAY_ELE_STATE;
}
 
Example #11
Source File: XCasToCasDataSaxHandlerTest.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
private void _testConversions(CAS aCAS) throws IOException,
        ParserConfigurationException, SAXException, ResourceInitializationException,
        CASRuntimeException {
  // generate XCAS events and pipe them to XCasToCasDataSaxHandler
  CasData casData = new CasDataImpl();
  XCasToCasDataSaxHandler handler = new XCasToCasDataSaxHandler(casData);
  XCASSerializer xcasSer = new XCASSerializer(aCAS.getTypeSystem());
  xcasSer.serialize(aCAS, handler);

  Assert.assertNotNull(casData);
  assertValidCasData(casData, aCAS.getTypeSystem());
  // System.out.println(casData);

  // now generate XCAS from the CasData
  CasDataToXCas generator = new CasDataToXCas();

  StringWriter sw = new StringWriter();
  XMLSerializer xmlSer = new XMLSerializer(sw, false);
  generator.setContentHandler(xmlSer.getContentHandler());

  generator.generateXCas(casData);
  String xml = sw.getBuffer().toString();
  
  //workaround for XML serializatioj problem on Sun Java 1.4
  if (!builtInXmlSerializationSupportsCRs()) {
    xml = xml.replaceAll("&#10;", "&#13;&#10;");  
  }
  
  UIMAFramework.getLogger(XCasToCasDataSaxHandlerTest.class).log(Level.FINE, xml);

  // deserialize back into CAS for comparison
  // CASMgr tcasMgr = CASFactory.createCAS(aCAS.getTypeSystem());
  // tcasMgr.initCASIndexes();
  // tcasMgr.getIndexRepositoryMgr().commit();

  CAS cas2 = CasCreationUtils.createCas(null, aCAS.getTypeSystem(), null);
  XCASDeserializer deser = new XCASDeserializer(cas2.getTypeSystem());
  ContentHandler deserHandler = deser.getXCASHandler(cas2);

  SAXParserFactory fact = SAXParserFactory.newInstance();
  SAXParser parser = fact.newSAXParser();
  XMLReader xmlReader = parser.getXMLReader();
  xmlReader.setContentHandler(deserHandler);
  xmlReader.parse(new InputSource(new StringReader(xml)));

  // CASes should be identical
  CasComparer.assertEquals(aCAS, cas2);
}
 
Example #12
Source File: XCasWriterCasConsumer.java    From uima-uimaj with Apache License 2.0 3 votes vote down vote up
/**
 * Serialize a CAS to a file in XCAS format
 * 
 * @param aCas
 *          CAS to serialize
 * @param name
 *          output file
 * 
 * @throws IOException
 *           if an I/O failure occurs
 * @throws SAXException
 *           if an error occurs generating the XML text
 */
private void writeXCas(CAS aCas, File name) throws IOException, SAXException {

  try (OutputStream out = new FileOutputStream(name)) {
    XCASSerializer ser = new XCASSerializer(aCas.getTypeSystem());
    XMLSerializer sax2xml = new XMLSerializer(out, false);
    ser.serialize(aCas, sax2xml.getContentHandler());
  }
}
 
Example #13
Source File: CASTransportable.java    From uima-uimaj with Apache License 2.0 2 votes vote down vote up
/**
 * Instantiates a new x talk serializer.
 *
 * @param os the os
 * @param s the s
 */
XTalkSerializer(OutputStream os, XCASSerializer s) {
  this.os = os;
  this.serializer = s;
}