org.apache.uima.util.XMLSerializer Java Examples

The following examples show how to use org.apache.uima.util.XMLSerializer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CTAKESClinicalPipelineFactory.java    From ctakes-clinical-pipeline with Apache License 2.0 6 votes vote down vote up
private static void serialize(JCas jcas, File file) throws SAXException, IOException {
  OutputStream outputStream = null;
  try {
    outputStream = new BufferedOutputStream(new FileOutputStream(file));

    XmiCasSerializer xmiSerializer = new XmiCasSerializer(
        jcas.getTypeSystem());
    XMLSerializer xmlSerializer = new XMLSerializer(outputStream, true);
    xmiSerializer.serialize(jcas.getCas(),
        xmlSerializer.getContentHandler());

  } catch (FileNotFoundException fnfe) {
    throw new FileNotFoundException(fnfe.getMessage());
  } catch (SAXException saxe) {
    throw new SAXException(saxe.getMessage());
  } finally {
    try {
      outputStream.close();
    } catch (IOException ioe) {
      throw new IOException(ioe.getMessage());
    }
  }
}
 
Example #2
Source File: CpeBuilder.java    From uima-uimafit with Apache License 2.0 6 votes vote down vote up
/**
 * Writes a temporary file containing a XML descriptor of the given resource. Returns the file.
 * 
 * @param resource
 *          A resource specifier that should we materialized.
 * @return The file containing the XML representation of the given resource.
 */
private static File materializeDescriptor(ResourceSpecifier resource) throws IOException,
        SAXException {
  File tempDesc = File.createTempFile("desc", ".xml");
  tempDesc.deleteOnExit();

  // Write the descriptor using XML 1.1 to allow a wider range of characters for parameter values
  try (OutputStream os = Files.newOutputStream(tempDesc.toPath())) {
    XMLSerializer sax2xml = new XMLSerializer(os, true);
    sax2xml.setOutputProperty(OutputKeys.VERSION, "1.1");
    ContentHandler contentHandler = sax2xml.getContentHandler();
    contentHandler.startDocument();
    resource.toXML(sax2xml.getContentHandler(), true);
    contentHandler.endDocument();
  }

  return tempDesc;
}
 
Example #3
Source File: CreateSampleXCASFile.java    From uima-uimafit with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws UIMAException, SAXException, IOException {
  TokenBuilder<Token, Sentence> tokenBuilder = new TokenBuilder<Token, Sentence>(Token.class,
          Sentence.class, "pos", "stem");
  JCas jCas = JCasFactory.createJCas();
  // quote from http://www.gutenberg.org/files/20417/20417-h/20417-h.htm
  String text = "... the more knowledge advances the more it becomes possible to condense it into little books.";
  tokenBuilder
          .buildTokens(
                  jCas,
                  text,
                  "... the more knowledge advances the more it becomes possible to condense it into little books . ",
                  ". T M K A T M I B P T C I I L B .",
                  "... the more knowledge advance the more it become possible to condense it into little book . ");

  FileOutputStream out = new FileOutputStream("src/test/resources/data/docs/test.xcas");
  XCASSerializer ser = new XCASSerializer(jCas.getTypeSystem());
  XMLSerializer xmlSer = new XMLSerializer(out, false);
  ser.serialize(jCas.getCas(), xmlSer.getContentHandler());
  out.close();
}
 
Example #4
Source File: JsonCasSerializerTest.java    From uima-uimaj with Apache License 2.0 6 votes vote down vote up
private String serialize() throws Exception {    
  StringWriter sw = null;
  ByteArrayOutputStream baos = null;
  try {
    if (doJson) {
      sw = new StringWriter();
      jcs.serialize(cas, sw);
      return sw.toString();
    } else {
      XmiCasSerializer xcs = new XmiCasSerializer(jcs.getCss().getFilterTypes());
      baos = new ByteArrayOutputStream();
      
      XMLSerializer sax2xml = new XMLSerializer(baos, jcs.getCss().isFormattedOutput);
      xcs.serialize(cas, sax2xml.getContentHandler(), null);
      return baos.toString("UTF-8");
    }
  } catch (Exception e) {
    System.err.format("Exception occurred. The string produced so far was: %n%s%n",
        (sw == null) ? baos.toString("UTF-8") : sw.toString());
    throw e;
  }
}
 
Example #5
Source File: NewPrimitiveTypesTest.java    From uima-uimaj with Apache License 2.0 6 votes vote down vote up
public void testXCASSerialization() throws Exception {

    // create FS
    createExampleFS(cas);

    // serialize
    XCASSerializer ser = new XCASSerializer(cas.getTypeSystem());
    OutputStream outputXCAS = new FileOutputStream(JUnitExtension
            .getFile("ExampleCas/newprimitives.xcas"));
    XMLSerializer xmlSer = new XMLSerializer(outputXCAS);
    ser.serialize(cas, xmlSer.getContentHandler());

    // reset
    cas.reset();

    // deserialize
    InputStream inputXCAS = new FileInputStream(JUnitExtension
            .getFile("ExampleCas/newprimitives.xcas"));
    XCASDeserializer.deserialize(inputXCAS, cas, false);

    // check values
    validateFSData(cas);

  }
 
Example #6
Source File: XCASDeserializerTest.java    From uima-uimaj with Apache License 2.0 6 votes vote down vote up
public void testOutOfTypeSystem3() throws Exception {
  // deserialize an XCAS using the implicit value feature into a CAS with no TypeSystem
  CAS cas = CasCreationUtils.createCas(new TypeSystemDescription_impl(),
          new TypePriorities_impl(), new FsIndexDescription[0]);
  String xcas = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><CAS>"
          + "<uima.tcas.Document _content=\"text\">Test Document</uima.tcas.Document>"
          + "<uima.tcas.DocumentAnnotation _indexed=\"1\" _id=\"8\" sofa=\"1\" begin=\"0\" end=\"13\" language=\"en\"/>"
          + "<foo.Bar _indexed=\"1\" _id=\"2\" sofa=\"1\" begin=\"0\" end=\"0\" baz=\"blah\">this is the value feature</foo.Bar></CAS>";
  OutOfTypeSystemData ootsd = new OutOfTypeSystemData();
  XMLReader xmlReader = XMLUtils.createXMLReader();
  XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem());
  ContentHandler handler = deser.getXCASHandler(cas, ootsd);
  xmlReader.setContentHandler(handler);
  xmlReader.parse(new InputSource(new StringReader(xcas)));

  // now reserialize including OutOfTypeSystem data
  XCASSerializer xcasSer = new XCASSerializer(cas.getTypeSystem());
  StringWriter sw = new StringWriter();
  XMLSerializer xmlSer = new XMLSerializer(sw, false);
  xcasSer.serialize(cas, xmlSer.getContentHandler(), true, ootsd);
  String xml = sw.getBuffer().toString();
  // System.out.println(xml);

  // make sure the value feature was not lost (it will be serialized as an attribute however)
  assertTrue(xml.indexOf("value=\"this is the value feature\"") != -1);
}
 
Example #7
Source File: XCasWriterCasConsumer.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
/**
 * Serialize a CAS to a file in XCAS format.
 *
 * @param aCas          CAS to serialize
 * @param name          output file
 * @throws IOException           if an I/O failure occurs
 * @throws SAXException           if an error occurs generating the XML text
 */
private void writeXCas(CAS aCas, File name) throws IOException, SAXException {

  try (OutputStream out = new FileOutputStream(name)) {
    XCASSerializer ser = new XCASSerializer(aCas.getTypeSystem());
    XMLSerializer xmlSer = new XMLSerializer(out, false);
    ser.serialize(aCas, xmlSer.getContentHandler());
  }
}
 
Example #8
Source File: ExternalRecommender.java    From inception with Apache License 2.0 5 votes vote down vote up
private String serializeCas(CAS aCas) throws RecommendationException
{
    try (StringWriter out = new StringWriter()) {
        // Passing "null" as the type system to the XmiCasSerializer means that we want 
        // to serialize all types (i.e. no filtering for a specific target type system).
        XmiCasSerializer xmiCasSerializer = new XmiCasSerializer(null);
        XMLSerializer sax2xml = new XMLSerializer(out, true);
        xmiCasSerializer.serialize(getRealCas(aCas), sax2xml.getContentHandler(), null, null,
                null);
        return out.toString();
    }
    catch (CASRuntimeException | SAXException | IOException e) {
        throw new RecommendationException("Error while serializing CAS!", e);
    }
}
 
Example #9
Source File: NewPrimitiveTypesTest.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
public void testXmiSerialization() throws Exception {

    // create FS
    createExampleFS(cas);

    // serialize

    StringWriter sw = new StringWriter();
    XMLSerializer xmlSer = new XMLSerializer(sw, false);
    XmiCasSerializer xmiSer = new XmiCasSerializer(cas.getTypeSystem());
    xmiSer.serialize(cas, xmlSer.getContentHandler());
    String xml = sw.getBuffer().toString();
    // System.out.println(xml);
    // reset
    cas.reset();

    // deserialize
    XmiCasDeserializer deser = new XmiCasDeserializer(cas.getTypeSystem());
    ContentHandler deserHandler = deser.getXmiCasHandler(cas);
    SAXParserFactory fact = SAXParserFactory.newInstance();
    SAXParser parser = fact.newSAXParser();
    XMLReader xmlReader = parser.getXMLReader();
    xmlReader.setContentHandler(deserHandler);
    xmlReader.parse(new InputSource(new StringReader(xml)));

    // check values
    validateFSData(cas);

  }
 
Example #10
Source File: CreateSampleXMIFile.java    From uima-uimafit with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws UIMAException, SAXException, IOException {
  TokenBuilder<Token, Sentence> tokenBuilder = new TokenBuilder<Token, Sentence>(Token.class,
          Sentence.class, "pos", "stem");
  JCas jCas = JCasFactory.createJCas();
  String text = "Me and all my friends are non-conformists.";
  tokenBuilder.buildTokens(jCas, text, "Me and all my friends are non - conformists .",
          "M A A M F A N - C .", "me and all my friend are non - conformist .");

  FileOutputStream out = new FileOutputStream("src/test/resources/data/docs/test.xmi");
  XmiCasSerializer ser = new XmiCasSerializer(jCas.getTypeSystem());
  XMLSerializer xmlSer = new XMLSerializer(out, false);
  ser.serialize(jCas.getCas(), xmlSer.getContentHandler());
  out.close();

}
 
Example #11
Source File: DATACasUtils.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
/**
 * Gets the XCA sas string.
 *
 * @param aCasData the a cas data
 * @param keysToFilter the keys to filter
 * @return the XCA sas string
 * @throws Exception the exception
 */
public static String getXCASasString(CasData aCasData, String[] keysToFilter) throws Exception {
  CasDataToXCas generator = new CasDataToXCas();
  generator.setTypesToFilter(keysToFilter);

  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  XMLSerializer sax2xml = new XMLSerializer(baos);
  generator.setContentHandler(sax2xml.getContentHandler());

  generator.generateXCas(aCasData);

  return new String(baos.toByteArray());
}
 
Example #12
Source File: XmiCasDeserializerTest.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
public void testMultipleSofas() throws Exception {
    try {
      CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
              new FsIndexDescription[0]);
      // set document text for the initial view
      cas.setDocumentText("This is a test");
      // create a new view and set its document text
      CAS cas2 = cas.createView("OtherSofa");
      cas2.setDocumentText("This is only a test");

      // Change this test to create an instance of TOP because you cannot add an annotation to other than 
      //   the view it is created in. https://issues.apache.org/jira/browse/UIMA-4099
      // create a TOP and add to index of both views
      Type topType = cas.getTypeSystem().getTopType();
      FeatureStructure aTOP = cas.createFS(topType);
      cas.getIndexRepository().addFS(aTOP);
      cas2.getIndexRepository().addFS(aTOP); 
      FSIterator<FeatureStructure> it = cas.getIndexRepository().getAllIndexedFS(topType);
      FSIterator<FeatureStructure> it2 = cas2.getIndexRepository().getAllIndexedFS(topType);
      it.next(); it.next();
      it2.next(); it2.next(); 
      assertFalse(it.hasNext());
      assertFalse(it2.hasNext());

      // serialize
      StringWriter sw = new StringWriter();
      XMLSerializer xmlSer = new XMLSerializer(sw, false);
      XmiCasSerializer xmiSer = new XmiCasSerializer(cas.getTypeSystem());
      xmiSer.serialize(cas, xmlSer.getContentHandler());
      String xml = sw.getBuffer().toString();

      // deserialize into another CAS (repeat twice to check it still works after reset)
      CAS newCas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
              new FsIndexDescription[0]);
      for (int i = 0; i < 2; i++) {
        XmiCasDeserializer newDeser = new XmiCasDeserializer(newCas.getTypeSystem());
        ContentHandler newDeserHandler = newDeser.getXmiCasHandler(newCas);
        SAXParserFactory fact = SAXParserFactory.newInstance();
        SAXParser parser = fact.newSAXParser();
        XMLReader xmlReader = parser.getXMLReader();
        xmlReader.setContentHandler(newDeserHandler);
        xmlReader.parse(new InputSource(new StringReader(xml)));

        // check sofas
        assertEquals("This is a test", newCas.getDocumentText());
        CAS newCas2 = newCas.getView("OtherSofa");
        assertEquals("This is only a test", newCas2.getDocumentText());

        // check that annotation is still indexed in both views
        // check that annotation is still indexed in both views
        it = newCas.getIndexRepository().getAllIndexedFS(topType);
        it2 = newCas2.getIndexRepository().getAllIndexedFS(topType);
        it.next(); it.next();
        it2.next(); it2.next(); 
        assertFalse(it.hasNext());
//        assertFalse(it2.hasNext());        assertTrue(tIndex.size() == 2); // document annot and this one
//        assertTrue(t2Index.size() == 2); // ditto

        newCas.reset();
      }
    } catch (Exception e) {
      JUnitExtension.handleException(e);
    }
  }
 
Example #13
Source File: DotCorpusSerializer.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
/**
 * Writes the <code>DotCorpus</code> instance to the given <code>OutputStream</code>.
 * 
 * @param dotCorpus
 *          the {@link DotCorpus} object to serialize.
 * @param out
 *          - the stream to write the current <code>DotCorpus</code> instance.
 * @throws CoreException -
 */
public static void serialize(DotCorpus dotCorpus, OutputStream out) throws CoreException {

  XMLSerializer xmlSerializer = new XMLSerializer(out, true);
  ContentHandler xmlSerHandler = xmlSerializer.getContentHandler();

  try {
    xmlSerHandler.startDocument();
    xmlSerHandler.startElement("", CONFIG_ELEMENT, CONFIG_ELEMENT, new AttributesImpl());

    for (String corpusFolder : dotCorpus.getCorpusFolderNameList()) {
      AttributesImpl corpusFolderAttributes = new AttributesImpl();
      corpusFolderAttributes.addAttribute("", "", CORPUS_FOLDER_ATTRIBUTE, "", corpusFolder);

      xmlSerHandler.startElement("", CORPUS_ELEMENT, CORPUS_ELEMENT, corpusFolderAttributes);
      xmlSerHandler.endElement("", CORPUS_ELEMENT, CORPUS_ELEMENT);
    }

    for (AnnotationStyle style : dotCorpus.getAnnotationStyles()) {
      AttributesImpl styleAttributes = new AttributesImpl();
      styleAttributes.addAttribute("", "", STYLE_TYPE_ATTRIBUTE, "", style.getAnnotation());
      styleAttributes.addAttribute("", "", STYLE_STYLE_ATTRIBUTE, "", style.getStyle().name());

      Color color = style.getColor();
      int colorInt = new Color(color.getRed(), color.getGreen(), color.getBlue()).getRGB();
      styleAttributes.addAttribute("", "", STYLE_COLOR_ATTRIBUTE, "", Integer.toString(colorInt));
      styleAttributes.addAttribute("", "", STYLE_LAYER_ATTRIBUTE, "", Integer.toString(style
              .getLayer()));
      if (style.getConfiguration() != null) {
        styleAttributes.addAttribute("", "", STYLE_CONFIG_ATTRIBUTE, "", style
                .getConfiguration());
      }

      xmlSerHandler.startElement("", STYLE_ELEMENT, STYLE_ELEMENT, styleAttributes);
      xmlSerHandler.endElement("", STYLE_ELEMENT, STYLE_ELEMENT);
    }

    for (String type : dotCorpus.getShownTypes()) {
      
      AttributesImpl shownAttributes = new AttributesImpl();
      shownAttributes.addAttribute("", "", SHOWN_TYPE_ATTRIBUTE, "", type);
      shownAttributes.addAttribute("", "", SHOWN_IS_VISISBLE_ATTRIBUTE, "", "true");
      
      xmlSerHandler.startElement("", SHOWN_ELEMENT, SHOWN_ELEMENT, shownAttributes);
      xmlSerHandler.endElement("", SHOWN_ELEMENT, SHOWN_ELEMENT);
    }
    
    if (dotCorpus.getTypeSystemFileName() != null) {
      AttributesImpl typeSystemFileAttributes = new AttributesImpl();
      typeSystemFileAttributes.addAttribute("", "", TYPESYTEM_FILE_ATTRIBUTE, "", dotCorpus
              .getTypeSystemFileName());

      xmlSerHandler.startElement("", TYPESYSTEM_ELEMENT, TYPESYSTEM_ELEMENT,
              typeSystemFileAttributes);
      xmlSerHandler.endElement("", TYPESYSTEM_ELEMENT, TYPESYSTEM_ELEMENT);
    }

    for (String folder : dotCorpus.getCasProcessorFolderNames()) {
      AttributesImpl taggerConfigAttributes = new AttributesImpl();
      taggerConfigAttributes.addAttribute("", "", CAS_PROCESSOR_FOLDER_ATTRIBUTE, "", folder);

      xmlSerHandler.startElement("", CAS_PROCESSOR_ELEMENT, CAS_PROCESSOR_ELEMENT,
              taggerConfigAttributes);
      xmlSerHandler.endElement("", CAS_PROCESSOR_ELEMENT, CAS_PROCESSOR_ELEMENT);
    }

    if (dotCorpus.getEditorLineLengthHint() != DotCorpus.EDITOR_LINE_LENGTH_HINT_DEFAULT) {
      AttributesImpl editorLineLengthHintAttributes = new AttributesImpl();
      editorLineLengthHintAttributes.addAttribute("", "", EDITOR_LINE_LENGTH_ATTRIBUTE, "",
              Integer.toString(dotCorpus.getEditorLineLengthHint()));

      xmlSerHandler.startElement("", EDITOR_ELEMENT, EDITOR_ELEMENT,
              editorLineLengthHintAttributes);
      xmlSerHandler.endElement("", EDITOR_ELEMENT, EDITOR_ELEMENT);
    }

    xmlSerHandler.endElement("", CONFIG_ELEMENT, CONFIG_ELEMENT);
    xmlSerHandler.endDocument();
  } catch (SAXException e) {
    String message = e.getMessage() != null ? e.getMessage() : "";

    IStatus s = new Status(IStatus.ERROR, CasEditorPlugin.ID, IStatus.OK, message, e);
    throw new CoreException(s);
  }
}
 
Example #14
Source File: AnalysisEngine_implTest.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
private void manyDelegatesCommon() throws Exception {
  // Test that an aggregate can be copied preserving all comments and ordering of delegates
  XMLParser.ParsingOptions parsingOptions = new XMLParser.ParsingOptions(false);
  parsingOptions.preserveComments = true;
  XMLParser parser = UIMAFramework.getXMLParser();
  File inFile = JUnitExtension.getFile("TextAnalysisEngineImplTest/AggregateWithManyDelegates.xml");
  AnalysisEngineDescription desc = parser.parseAnalysisEngineDescription(new XMLInputSource(inFile), parsingOptions);

  // Write out descriptor
  File cloneFile = new File(inFile.getParentFile(), "CopyOfAggregateWithManyDelegates.xml");
  try (BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(cloneFile))) {
    XMLSerializer xmlSerializer = new XMLSerializer(false);
    xmlSerializer.setOutputStream(os);
    // set the amount to a value which will show up if used
    // indent should not be used because we're using a parser mode which preserves
    // comments and ignorable white space.
    // NOTE: Saxon appears to force the indent to be 3 - which is what the input file now uses.
    xmlSerializer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
    ContentHandler contentHandler = xmlSerializer.getContentHandler();
    contentHandler.startDocument();
    desc.toXML(contentHandler, true);
    contentHandler.endDocument();
  }
  
  String inXml = FileCompare.file2String(inFile);
  String cloneXml = FileCompare.file2String(cloneFile);
  XMLAssert.assertXMLEqual(inXml,  cloneXml);
  // When building from a source distribution the descriptor may not have
  // appropriate line-ends so compute the length as if always 1 byte.
  int diff = fileLength(cloneFile) - fileLength(inFile);
  // One platform inserts a blank line and a final newline, so don't insist on perfection
  // NOTE:  This fails with Saxon as it omits the xmlns attribute (why?) and omits the newlines between adjacent comments.
  // It also produces many differences in indentation if the input is not indented by 3
  assertTrue("File size changed by "+diff+" should be no more than 2", diff >= -2 && diff <= 2);

  // Initialize all delegates and check the initialization order (should be declaration order)
  TestAnnotator2.allContexts = "";
  UIMAFramework.produceAnalysisEngine(desc);
  assertEquals("D/C/B/A/F/E/", TestAnnotator2.allContexts);
  
  // Check that copying aggregate preserved the order of the delegates
  desc = parser.parseAnalysisEngineDescription(new XMLInputSource(cloneFile), parsingOptions);
  TestAnnotator2.allContexts = "";
  UIMAFramework.produceAnalysisEngine(desc);
  assertEquals("D/C/B/A/F/E/", TestAnnotator2.allContexts);
  cloneFile.delete();
}
 
Example #15
Source File: XCasToCasDataSaxHandlerTest.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
private void _testConversions(CAS aCAS) throws IOException,
        ParserConfigurationException, SAXException, ResourceInitializationException,
        CASRuntimeException {
  // generate XCAS events and pipe them to XCasToCasDataSaxHandler
  CasData casData = new CasDataImpl();
  XCasToCasDataSaxHandler handler = new XCasToCasDataSaxHandler(casData);
  XCASSerializer xcasSer = new XCASSerializer(aCAS.getTypeSystem());
  xcasSer.serialize(aCAS, handler);

  Assert.assertNotNull(casData);
  assertValidCasData(casData, aCAS.getTypeSystem());
  // System.out.println(casData);

  // now generate XCAS from the CasData
  CasDataToXCas generator = new CasDataToXCas();

  StringWriter sw = new StringWriter();
  XMLSerializer xmlSer = new XMLSerializer(sw, false);
  generator.setContentHandler(xmlSer.getContentHandler());

  generator.generateXCas(casData);
  String xml = sw.getBuffer().toString();
  
  //workaround for XML serializatioj problem on Sun Java 1.4
  if (!builtInXmlSerializationSupportsCRs()) {
    xml = xml.replaceAll("&#10;", "&#13;&#10;");  
  }
  
  UIMAFramework.getLogger(XCasToCasDataSaxHandlerTest.class).log(Level.FINE, xml);

  // deserialize back into CAS for comparison
  // CASMgr tcasMgr = CASFactory.createCAS(aCAS.getTypeSystem());
  // tcasMgr.initCASIndexes();
  // tcasMgr.getIndexRepositoryMgr().commit();

  CAS cas2 = CasCreationUtils.createCas(null, aCAS.getTypeSystem(), null);
  XCASDeserializer deser = new XCASDeserializer(cas2.getTypeSystem());
  ContentHandler deserHandler = deser.getXCASHandler(cas2);

  SAXParserFactory fact = SAXParserFactory.newInstance();
  SAXParser parser = fact.newSAXParser();
  XMLReader xmlReader = parser.getXMLReader();
  xmlReader.setContentHandler(deserHandler);
  xmlReader.parse(new InputSource(new StringReader(xml)));

  // CASes should be identical
  CasComparer.assertEquals(aCAS, cas2);
}
 
Example #16
Source File: XmiCasDeserializerTest.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
public void testTypeSystemFiltering() throws Exception {
  try {
    // deserialize a complex CAS from XCAS
    CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);

    InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
    XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem());
    ContentHandler deserHandler = deser.getXCASHandler(cas);
    SAXParserFactory fact = SAXParserFactory.newInstance();
    SAXParser parser = fact.newSAXParser();
    XMLReader xmlReader = parser.getXMLReader();
    xmlReader.setContentHandler(deserHandler);
    xmlReader.parse(new InputSource(serCasStream));
    serCasStream.close();

    // now read in a TypeSystem that's a subset of those types
    TypeSystemDescription partialTypeSystemDesc = UIMAFramework.getXMLParser()
            .parseTypeSystemDescription(
                    new XMLInputSource(JUnitExtension
                            .getFile("ExampleCas/partialTestTypeSystem.xml")));
    TypeSystem partialTypeSystem = CasCreationUtils.createCas(partialTypeSystemDesc, null, null)
            .getTypeSystem();

    // reserialize as XMI, filtering out anything that doesn't fit in the
    // partialTypeSystem
    StringWriter sw = new StringWriter();
    XMLSerializer xmlSer = new XMLSerializer(sw, false);
    XmiCasSerializer xmiSer = new XmiCasSerializer(partialTypeSystem);
    xmiSer.serialize(cas, xmlSer.getContentHandler());
    String xml = sw.getBuffer().toString();
    // System.out.println(xml);

    // deserialize into another CAS (which has the whole type system)
    CAS cas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
    XmiCasDeserializer deser2 = new XmiCasDeserializer(cas2.getTypeSystem());
    ContentHandler deserHandler2 = deser2.getXmiCasHandler(cas2);
    xmlReader.setContentHandler(deserHandler2);
    xmlReader.parse(new InputSource(new StringReader(xml)));

    // check that types have been filtered out
    Type orgType = cas2.getTypeSystem().getType("org.apache.uima.testTypeSystem.Organization");
    assertNotNull(orgType);
    assertTrue(cas2.getAnnotationIndex(orgType).size() == 0);
    assertTrue(cas.getAnnotationIndex(orgType).size() > 0);

    // but that some types are still there
    Type personType = cas2.getTypeSystem().getType("org.apache.uima.testTypeSystem.Person");
    FSIndex personIndex = cas2.getAnnotationIndex(personType);
    assertTrue(personIndex.size() > 0);

    // check that mentionType has been filtered out (set to null)
    FeatureStructure somePlace = personIndex.iterator().get();
    Feature mentionTypeFeat = personType.getFeatureByBaseName("mentionType");
    assertNotNull(mentionTypeFeat);
    assertNull(somePlace.getStringValue(mentionTypeFeat));
  } catch (Exception e) {
    JUnitExtension.handleException(e);
  }
}
 
Example #17
Source File: XCASDeserializerTest.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
public void testMultipleSofas() throws Exception {
    /*************************************************
     * Make CAS with 2 sofas, initial and OtherSofa  *
     *                                               *
     * Add instance of TOP and index in both views   *
     *                                               *
     * Serialize to string "xml"                     *
     *                                               *
     * Deserialize from string                       *
     *************************************************/
    CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
    // set document text for the initial view
    cas.setDocumentText("This is a test");
    // create a new view and set its document text
    CAS cas2 = cas.createView("OtherSofa");
    cas2.setDocumentText("This is only a test");

    // Change this test to create an instance of TOP because you cannot add an annotation to other than 
    //   the view it is created in. https://issues.apache.org/jira/browse/UIMA-4099
    // create a TOP and add to index of both views
    Type topType = cas.getTypeSystem().getTopType();
    FeatureStructure aTOP = cas.createFS(topType);
    cas.getIndexRepository().addFS(aTOP);
    cas2.getIndexRepository().addFS(aTOP); 
    FSIterator<FeatureStructure> it = cas.getIndexRepository().getAllIndexedFS(topType);
    FSIterator<FeatureStructure> it2 = cas2.getIndexRepository().getAllIndexedFS(topType);
    it.next(); it.next();
    it2.next(); it2.next(); 
    assertFalse(it.hasNext());
    assertFalse(it2.hasNext());
     
    // serialize
    StringWriter sw = new StringWriter();
    XMLSerializer xmlSer = new XMLSerializer(sw, false);
    XCASSerializer xcasSer = new XCASSerializer(cas.getTypeSystem());
    xcasSer.serialize(cas, xmlSer.getContentHandler(), true);
    String xml = sw.getBuffer().toString();

    // deserialize into another CAS (repeat twice to check it still works after reset)
    CAS newCas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
    for (int i = 0; i < 2; i++) {
      XCASDeserializer newDeser = new XCASDeserializer(newCas.getTypeSystem());
      ContentHandler newDeserHandler = newDeser.getXCASHandler(newCas);
      SAXParserFactory fact = SAXParserFactory.newInstance();
      SAXParser parser = fact.newSAXParser();
      XMLReader xmlReader = parser.getXMLReader();
      xmlReader.setContentHandler(newDeserHandler);
      xmlReader.parse(new InputSource(new StringReader(xml)));

      // check sofas
      assertEquals("This is a test", newCas.getDocumentText());
      CAS newCas2 = newCas.getView("OtherSofa");
      assertEquals("This is only a test", newCas2.getDocumentText());

      // check that annotation is still indexed in both views
      it = newCas.getIndexRepository().getAllIndexedFS(topType);
      it2 = newCas2.getIndexRepository().getAllIndexedFS(topType);
      it.next(); it.next();
      it2.next(); it2.next(); 
      assertFalse(it.hasNext());
      assertFalse(it2.hasNext());
//      assertTrue(tIndex.size() == 2); // document annot and this one
//      assertTrue(t2Index.size() == 2); // ditto
      newCas.reset();  // testing if works after cas reset, go around loop 2nd time
    }
  }
 
Example #18
Source File: XCASDeserializerTest.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
public void testOutOfTypeSystem2() throws Exception {
    // deserialize a complex CAS into one with no TypeSystem
    CAS cas = CasCreationUtils.createCas(new TypeSystemDescription_impl(),
            new TypePriorities_impl(), new FsIndexDescription[0]);
    OutOfTypeSystemData ootsd = new OutOfTypeSystemData();
    InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
    XCASDeserializer deser = new XCASDeserializer(cas.getTypeSystem());
    ContentHandler deserHandler = deser.getXCASHandler(cas, ootsd);
    SAXParserFactory fact = SAXParserFactory.newInstance();
    SAXParser parser = fact.newSAXParser();
    XMLReader xmlReader = parser.getXMLReader();
    xmlReader.setContentHandler(deserHandler);
    xmlReader.parse(new InputSource(serCasStream));
    serCasStream.close();

    // now reserialize including OutOfTypeSystem data
    XCASSerializer xcasSer = new XCASSerializer(cas.getTypeSystem());
    StringWriter sw = new StringWriter();
    XMLSerializer xmlSer = new XMLSerializer(sw, false);
    xcasSer.serialize(cas, xmlSer.getContentHandler(), true, ootsd);
    String xml = sw.getBuffer().toString();
//    System.out.println("debug writing temp/xmlv3.xml");
//    FileUtils.saveString2File(xml, new File("c:/temp/xmlv3.xml"));
//    System.out.println(xml);

    // deserialize into a CAS that accepts the full typesystem
    CAS cas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
    XCASDeserializer deser2 = new XCASDeserializer(cas2.getTypeSystem());
    ContentHandler deserHandler2 = deser2.getXCASHandler(cas2);
    xmlReader = parser.getXMLReader();
    xmlReader.setContentHandler(deserHandler2);
    xmlReader.parse(new InputSource(new StringReader(xml)));

    // check that array refs are not null
    Type entityType = cas2.getTypeSystem().getType("org.apache.uima.testTypeSystem.Entity");
    Feature classesFeat = entityType.getFeatureByBaseName("classes");
    Iterator<FeatureStructure> iter = cas2.getIndexRepository().getIndex("testEntityIndex").iterator();
    assertTrue(iter.hasNext());
    while (iter.hasNext()) {
      FeatureStructure fs = iter.next();
      StringArrayFS arrayFS = (StringArrayFS) fs.getFeatureValue(classesFeat);
      assertNotNull(arrayFS);
      for (int i = 0; i < arrayFS.size(); i++) {
        assertNotNull(arrayFS.get(i));
      }
    }
  }
 
Example #19
Source File: MetaDataObject_impl.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
private void toXML(XMLSerializer sax2xml) throws SAXException, IOException {
  ContentHandler contentHandler = sax2xml.getContentHandler();
  contentHandler.startDocument();
  toXML(contentHandler, true);  // no reason to create a new content handler
  contentHandler.endDocument();
}
 
Example #20
Source File: XCasWriterCasConsumer.java    From uima-uimaj with Apache License 2.0 3 votes vote down vote up
/**
 * Serialize a CAS to a file in XCAS format
 * 
 * @param aCas
 *          CAS to serialize
 * @param name
 *          output file
 * 
 * @throws IOException
 *           if an I/O failure occurs
 * @throws SAXException
 *           if an error occurs generating the XML text
 */
private void writeXCas(CAS aCas, File name) throws IOException, SAXException {

  try (OutputStream out = new FileOutputStream(name)) {
    XCASSerializer ser = new XCASSerializer(aCas.getTypeSystem());
    XMLSerializer sax2xml = new XMLSerializer(out, false);
    ser.serialize(aCas, sax2xml.getContentHandler());
  }
}
 
Example #21
Source File: XmiCasSerializer.java    From uima-uimaj with Apache License 2.0 3 votes vote down vote up
/**
 * Serializes a Delta CAS to an XMI stream.  This version of this method allows many options to be configured.
 *     
 *    
 * @param aCAS
 *          CAS to serialize.
 * @param aTargetTypeSystem
 *          type system to which the produced XMI will conform. Any types or features not in the
 *          target type system will not be serialized.  A null value indicates that all types and features
 *          will be serialized.
 * @param aStream
 *          output stream to which to write the XMI document
 * @param aPrettyPrint
 *          if true the XML output will be formatted with newlines and indenting.  If false it will be unformatted.
 * @param aSharedData
 *          an optional container for data that is shared between the {@link XmiCasSerializer} and the {@link XmiCasDeserializer}.
 *          See the JavaDocs for {@link XmiSerializationSharedData} for details.
 * @param aMarker
 *          an optional object that is used to filter and serialize a Delta CAS containing only
 *          those FSs and Views created after Marker was set and preexisting FSs and views that were modified.
 *          See the JavaDocs for {@link Marker} for details.
 * @param useXml_1_1
 *          if true, the output serializer is set with the OutputKeys.VERSION to "1.1".         
 * @throws SAXException
 *           if a problem occurs during XMI serialization
 */
public static void serialize(CAS aCAS, TypeSystem aTargetTypeSystem, OutputStream aStream, boolean aPrettyPrint, 
        XmiSerializationSharedData aSharedData, Marker aMarker, boolean useXml_1_1) 
        throws SAXException {
  XmiCasSerializer xmiCasSerializer = new XmiCasSerializer(aTargetTypeSystem);
  XMLSerializer sax2xml = new XMLSerializer(aStream, aPrettyPrint);
  if (useXml_1_1) {
    sax2xml.setOutputProperty(OutputKeys.VERSION,"1.1");
  }
  xmiCasSerializer.serialize(aCAS, sax2xml.getContentHandler(), null, aSharedData, aMarker);
}
 
Example #22
Source File: XmiWriterCasConsumer.java    From uima-uimaj with Apache License 2.0 3 votes vote down vote up
/**
 * Serialize a CAS to a file in XMI format
 * 
 * @param aCas
 *          CAS to serialize
 * @param name
 *          output file
 * @throws SAXException -
 * @throws Exception -
 * 
 * @throws ResourceProcessException -
 */
private void writeXmi(CAS aCas, File name, String modelFileName) throws IOException, SAXException {

  try (FileOutputStream out = new FileOutputStream(name)) {
    // write XMI
    XmiCasSerializer ser = new XmiCasSerializer(aCas.getTypeSystem());
    XMLSerializer xmlSer = new XMLSerializer(out, false);
    ser.serialize(aCas, xmlSer.getContentHandler());
  }
}
 
Example #23
Source File: XmiWriterCasConsumer.java    From uima-uimaj with Apache License 2.0 3 votes vote down vote up
/**
 * Serialize a CAS to a file in XMI format
 * 
 * @param aCas
 *          CAS to serialize
 * @param name
 *          output file
 * @throws SAXException -
 * @throws Exception -
 * 
 * @throws ResourceProcessException -
 */
private void writeXmi(CAS aCas, File name, String modelFileName) throws IOException, SAXException {

  try (OutputStream out = new FileOutputStream(name)) {
    // write XMI
    XmiCasSerializer ser = new XmiCasSerializer(aCas.getTypeSystem());
    XMLSerializer xmlSer = new XMLSerializer(out, false);
    ser.serialize(aCas, xmlSer.getContentHandler());
  }
}
 
Example #24
Source File: XCASSerializer.java    From uima-uimaj with Apache License 2.0 3 votes vote down vote up
/**
 * Serializes an XCAS to a stream.
 * 
 * @param aCAS
 *          CAS to serialize.
 * @param aStream
 *          output stream to which to write the XCAS XML document
 * @param isFormattedOutput
 *          if true the XCAS will be serialized formatted   * 
 * @param useXml_1_1
 *          if true, the output serializer is set with the OutputKeys.VERSION to "1.1".         
 * @throws SAXException
 *           if a problem occurs during XCAS serialization
 * @throws IOException
 *           if an I/O failure occurs
 */
public static void serialize(CAS aCAS, OutputStream aStream, boolean isFormattedOutput, boolean useXml_1_1)
        throws SAXException, IOException {
  XCASSerializer xcasSerializer = new XCASSerializer(aCAS.getTypeSystem());
  XMLSerializer sax2xml = new XMLSerializer(aStream, isFormattedOutput);
  if (useXml_1_1) {
    sax2xml.setOutputProperty(OutputKeys.VERSION,"1.1");
  }
  xcasSerializer.serialize(aCAS, sax2xml.getContentHandler());
}
 
Example #25
Source File: MetaDataObject_impl.java    From uima-uimaj with Apache License 2.0 2 votes vote down vote up
/**
 * Writes out this object's XML representation.
 * 
 * @param aOutputStream
 *          an OutputStream to which the XML string will be written
 */
public void toXML(OutputStream aOutputStream) throws SAXException, IOException {
  toXML(new XMLSerializer(aOutputStream));
}
 
Example #26
Source File: MetaDataObject_impl.java    From uima-uimaj with Apache License 2.0 2 votes vote down vote up
/**
 * Writes out this object's XML representation.
 * 
 * @param aWriter
 *          a Writer to which the XML string will be written
 */
public void toXML(Writer aWriter) throws SAXException, IOException {
  toXML(new XMLSerializer(aWriter));
}
 
Example #27
Source File: TypeSystem2Xml.java    From uima-uimaj with Apache License 2.0 2 votes vote down vote up
/**
 * Converts a TypeSystem object to XML.  Built-in types and array types are not included.
 * 
 * @param aTypeSystem
 *          the TypeSystem to convert
 * @param aOutputStream
 *          the stream to which XML output will be written
 * 
 * @throws IOException
 *           if there is a problem writing to the provided OutputStream
 * @throws SAXException
 *           if an error occurs during the translation of the type system to XML
 */
public static void typeSystem2Xml(TypeSystem aTypeSystem, OutputStream aOutputStream)
        throws SAXException, IOException {
  XMLSerializer sax2xml = new XMLSerializer(aOutputStream);
  typeSystem2Xml(aTypeSystem, sax2xml.getContentHandler());
}