Java Code Examples for org.apache.uima.cas.CAS#reset()

The following examples show how to use org.apache.uima.cas.CAS#reset() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CasIOUtilsTest.java    From uima-uimaj with Apache License 2.0 6 votes vote down vote up
private void testFormat(SerialFormat format, String fileEnding, boolean leniently) throws Exception {
  File casFile = new File("target/temp-test-output/simpleCas."+ fileEnding);
  casFile.getParentFile().mkdirs();
  FileOutputStream docOS = new FileOutputStream(casFile);
  CasIOUtils.save(cas, docOS, format);
  docOS.close();
  
  // Use a CAS initialized with the "correct" type system or with a different type system?
  CAS casToUse = leniently ? cas2 : cas;
  casToUse.reset();
  
  FileInputStream casInputStream = new FileInputStream(casFile);
  SerialFormat loadedFormat = CasIOUtils.load(casInputStream, null, casToUse, leniently ? CasLoadMode.LENIENT : CasLoadMode.DEFAULT);
  casInputStream.close();
  Assert.assertEquals(format, loadedFormat);
  assertCorrectlyLoaded(casToUse, leniently);
}
 
Example 2
Source File: AnalysisEngine_implTest.java    From uima-uimaj with Apache License 2.0 6 votes vote down vote up
/**
 * Auxiliary method used by testProcess()
 * 
 * @param aTaeDesc
 *          description of TextAnalysisEngine to test
 */
protected void _testProcessInner(AnalysisEngine ae, CAS tcas, ResultSpecification resultSpec,
    ResultSpecification expectedLastResultSpec) throws UIMAException {
  // create and initialize TextAnalysisEngine

  // Test each form of the process method. When TestAnnotator executes, it
  // stores in static fields the document text and the ResultSpecification.
  // We use these to make sure the information propagates correctly to the annotator.

  // process(CAS)
  //   Calls with the Result spec set to default to that of the outer annotator output capabilities
  tcas.setDocumentText("new test");
  ae.process(tcas);
  assertEquals("new test", TestAnnotator.lastDocument);
  tcas.reset();

  // process(CAS,ResultSpecification)
  tcas.setDocumentText("testing...");
  ae.process(tcas, resultSpec);
  assertEquals("testing...", TestAnnotator.lastDocument);
  assertEquals(expectedLastResultSpec, TestAnnotator.lastResultSpec);
  tcas.reset();
  ae.destroy();
}
 
Example 3
Source File: ExampleApplication.java    From uima-uimaj with Apache License 2.0 6 votes vote down vote up
/**
 * Processes a single XML file and prints annotations to System.out
 * 
 * @param aFile
 *          file to process
 * @param aAE
 *          Analysis Engine that will process the file
 * @param aCAS
 *          CAS that will be used to hold analysis results
 */
private static void processFile(File aFile, AnalysisEngine aAE, CAS aCAS) throws IOException,
        AnalysisEngineProcessException {
  System.out.println("Processing file " + aFile.getName());

  String document = FileUtils.file2String(aFile);
  document = document.trim();

  // put document text in CAS
  aCAS.setDocumentText(document);

  // process
  aAE.process(aCAS);

  // print annotations to System.out
  PrintAnnotations.printAnnotations(aCAS, System.out);

  // reset the CAS to prepare it for processing the next document
  aCAS.reset();
}
 
Example 4
Source File: BratAnnotatorUtility.java    From webanno with Apache License 2.0 5 votes vote down vote up
public static CAS clearAnnotations(CAS aCas)
    throws IOException
{
    CAS target;
    try {
        target = CasFactory.createCas((TypeSystemDescription) null);
    }
    catch (UIMAException e) {
        throw new IOException(e);
    }
    
    // Copy the CAS - basically we do this just to keep the full type system information
    CASCompleteSerializer serializer = serializeCASComplete((CASImpl) getRealCas(aCas));
    deserializeCASComplete(serializer, (CASImpl) getRealCas(target));

    // Remove all annotations from the target CAS but we keep the type system!
    target.reset();
    
    // Copy over essential information
    if (exists(aCas, getType(aCas, DocumentMetaData.class))) {
        copyDocumentMetadata(aCas, target);
    }
    else {
        WebAnnoCasUtil.createDocumentMetadata(aCas);
    }
    target.setDocumentLanguage(aCas.getDocumentLanguage()); // DKPro Core Issue 435
    target.setDocumentText(aCas.getDocumentText());
    
    // Transfer token boundaries
    for (AnnotationFS t : selectTokens(aCas)) {
        target.addFsToIndexes(createToken(target, t.getBegin(), t.getEnd()));
    }

    // Transfer sentence boundaries
    for (AnnotationFS s : selectSentences(aCas)) {
        target.addFsToIndexes(createSentence(target, s.getBegin(), s.getEnd()));
    }

    return target;
}
 
Example 5
Source File: CasIOUtilsTest.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
public void testXCAS(boolean leniently) throws Exception {
  File casFile = new File("target/temp-test-output/simpleCas.xcas");
  casFile.getParentFile().mkdirs();
  try (FileOutputStream docOS = new FileOutputStream(casFile)) {
    CasIOUtils.save(cas, docOS, SerialFormat.XCAS);
  }
  
  // Use a CAS initialized with the "correct" type system or with a different type system?
  CAS casToUse = leniently ? cas2 : cas;
  
  casToUse.reset();
  CasIOUtils.load(casFile.toURI().toURL(), null, casToUse, leniently ? CasLoadMode.LENIENT : CasLoadMode.DEFAULT);
  assertCorrectlyLoaded(casToUse, leniently);
}
 
Example 6
Source File: AnalysisEnginePoolTest.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
/**
 * Auxilliary method used by testProcess()
 * 
 * @param aTaeDesc
 *          description of TextAnalysisEngine to test
 */
protected void _testProcess(AnalysisEnginePool aPool, int i)
        throws UIMAException {
  AnalysisEngine tae = aPool.getAnalysisEngine(0);
  try {
    // Test each form of the process method. When TestAnnotator executes, it
    // stores in static fields the document text and the ResultSpecification.
    // We use thse to make sure the information propogates correctly to the annotator.

    // process(CAS)
    CAS tcas = tae.newCAS();
    mLastTypeSystem = tcas.getTypeSystem();
    tcas.setDocumentText("new test");
    tae.process(tcas);
    tcas.reset();

    // process(CAS,ResultSpecification)
    ResultSpecification resultSpec = new ResultSpecification_impl(tcas.getTypeSystem());
    resultSpec.addResultType("NamedEntity", true);

    tcas.setDocumentText("testing...");
    tae.process(tcas, resultSpec);
    tcas.reset();
  } finally {
    aPool.releaseAnalysisEngine(tae);
  }
}
 
Example 7
Source File: BinaryCasSerDesPerformance.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
public void testBinaryCasDeserialization6Performance() throws Exception {
    
    File dir = new File("" /*"/au/t/data/bin-compr-6/shakespeare.txt_40_processed"*/);
    
    if (!dir.exists()) return;
    
    File typeSystemFile = new File(dir, "typesystem.xml");
    XMLInputSource in = new XMLInputSource(typeSystemFile);
    TypeSystemDescription typeSystemDescription = UIMAFramework.getXMLParser().parseTypeSystemDescription(in);
    CAS cas = CasCreationUtils.createCas(typeSystemDescription, null, null);
    
    long accumDeser = 0;
    long accumSer = 0;
    for (int i = 0; i <10; i++) {
    for (final File f : dir.listFiles()) {
      if (f.getName().equals("typesystem.xml")) {
        continue;
      }
      InputStream inputStream = new BufferedInputStream(new FileInputStream(f));
      cas.reset();
      long ist = System.nanoTime();
      Serialization.deserializeCAS(cas, inputStream);
      accumDeser += System.nanoTime() - ist;  
      
      ByteArrayOutputStream baos = new ByteArrayOutputStream(1024*512);
      ist = System.nanoTime();
      Serialization.serializeWithCompression(cas, baos, cas.getTypeSystem());
      accumSer += System.nanoTime() - ist;
//      System.out.format("Time to deserialize was %,d milliseconds, size = %d%n", 
//          (System.nanoTime() - ist) / 1000000L, ((CASImpl)cas).getHeap().getHeapSize());
    }
    }
    System.out.format("Time to deserialize all files was %,d milliseconds%n", accumDeser / 1000000); // (System.nanoTime() - startTime) / 1000000L);
    System.out.format("Time to serialize   all files was %,d milliseconds%n", accumSer / 1000000);
  }
 
Example 8
Source File: XmiCasDeserializerTest.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
public void testOutOfTypeSystemDataComplexCas() throws Exception {
   // deserialize a complex XCAS
   CAS originalCas = CasCreationUtils.createCas(typeSystem, null, indexes);
   InputStream serCasStream = new FileInputStream(JUnitExtension.getFile("ExampleCas/cas.xml"));
   XCASDeserializer.deserialize(serCasStream, originalCas);
   serCasStream.close();
   
   //serialize to XMI
   String xmiStr = serialize(originalCas, null);
   
   //deserialize into a CAS with no type system
   CAS casWithNoTs = CasCreationUtils.createCas(new TypeSystemDescription_impl(),
           new TypePriorities_impl(), new FsIndexDescription[0]);
   XmiSerializationSharedData sharedData = new XmiSerializationSharedData();
   deserialize(xmiStr, casWithNoTs, sharedData, true, -1);
       
   // now reserialize including OutOfTypeSystem data
   String xmiStr2 = serialize(casWithNoTs, sharedData);
   
   //deserialize into a new CAS that has the full type system
   CAS newCas = CasCreationUtils.createCas(typeSystem, null, indexes);
   deserialize(xmiStr2, newCas, null, false, -1);
   
   //compare
   CasComparer.assertEquals(originalCas, newCas);
   
   //Test a partial type system with a missing some missing features and
   //missing "Organization" type
   File partialTypeSystemFile = JUnitExtension.getFile("ExampleCas/partialTestTypeSystem.xml");
   TypeSystemDescription partialTypeSystem = UIMAFramework.getXMLParser().parseTypeSystemDescription(
           new XMLInputSource(partialTypeSystemFile));
   CAS partialTsCas = CasCreationUtils.createCas(partialTypeSystem, null, indexes);
   XmiSerializationSharedData sharedData2 = new XmiSerializationSharedData();
   deserialize(xmiStr, partialTsCas, sharedData2, true, -1);
       
   String xmiStr3 = serialize(partialTsCas, sharedData2);
   newCas.reset();
   deserialize(xmiStr3, newCas, null, false, -1);
   CasComparer.assertEquals(originalCas, newCas);    
}
 
Example 9
Source File: MultiprocessingAnalysisEngine_implTest.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
/**
 * Auxilliary method used by testProcess()
 * 
 * @param aTaeDesc
 *          description of TextAnalysisEngine to test
 * @param i
 *          thread identifier for multithreaded testing
 */
protected void _testProcess(AnalysisEngineDescription aTaeDesc, int i) throws UIMAException {
  // create and initialize MultiprocessingTextAnalysisEngine
  MultiprocessingAnalysisEngine_impl tae = new MultiprocessingAnalysisEngine_impl();
  tae.initialize(aTaeDesc, null);

  // Test each form of the process method. When TestAnnotator executes, it
  // stores in static fields the document text and the ResultSpecification.
  // We use thse to make sure the information propogates correctly to the annotator.

  // process(CAS)
  CAS tcas = tae.newCAS();
  tcas.setDocumentText("new test");
  tae.process(tcas);
  assertEquals("new test", TestAnnotator.lastDocument);
  tcas.reset();

  // process(CAS,ResultSpecification)
  ResultSpecification resultSpec = new ResultSpecification_impl(tcas.getTypeSystem());
  resultSpec.addResultType("NamedEntity", true);

  tcas.setDocumentText("testing...");
  tae.process(tcas, resultSpec);
  assertEquals("testing...", TestAnnotator.lastDocument);
  assertEquals(resultSpec, TestAnnotator.lastResultSpec);
  tcas.reset();
}
 
Example 10
Source File: CasPool.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
/**
 * Checks in a CAS to the pool. This automatically calls the {@link CAS#reset()} method, to ensure
 * that when the CAS is later retrieved from the pool it will be ready to use. Also notifies other
 * Threads that may be waiting for an instance to become available.
 * 
 * Synchronized on the CAS to avoid the unnatural case where 
 * multiple threads attempt to return the same CAS to the pool
 * at the same time. 
 * 
 * @param aCas
 *          the Cas to release
 */
public void releaseCas(CAS aCas) {
  // note the pool stores references to the InitialView of each CAS
  aCas.setCurrentComponentInfo(null);  // https://issues.apache.org/jira/browse/UIMA-3655
  CAS cas = aCas.getView(CAS.NAME_DEFAULT_SOFA);

  // make sure this CAS actually belongs to this pool and is checked out
  // synchronize to avoid the same CAS being released on 2 threads
  synchronized (cas) {
    if (!mAllInstances.contains(cas) || mFreeInstances.contains(cas)) {
      UIMAFramework.getLogger(CLASS_NAME).logrb(Level.WARNING, CLASS_NAME.getName(), "releaseCas",
              LOG_RESOURCE_BUNDLE, "UIMA_return_cas_to_pool__WARNING");
    } else {
      // restore the ClassLoader and unlock the CAS, since release() can be called 
      // from within a CAS Multiplier.
      ((CASImpl)cas).restoreClassLoaderUnlockCas(); 
      
      // reset CAS
      cas.reset();
      
      // Add the CAS to the end of the free instances List
      mFreeInstances.add(cas);
      permits.release();  // should follow adding cas back to mFreeInstances
    }
  }

  // Notify any threads waiting on this object
  // not needed by UIMA Core - other users may need.
  synchronized (this) {
    notifyAll();
  }
}
 
Example 11
Source File: CPECasPool.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
/**
 * Checks in a CAS to the pool. This automatically calls the {@link CAS#reset()} method, to ensure
 * that when the CAS is later retrieved from the pool it will be ready to use. Also notifies other
 * Threads that may be waiting for an instance to become available.
 * 
 * @param aCas
 *          the CAS to release
 */
public synchronized void releaseCas(CAS aCas) {
  // make sure this CAS actually belongs to this pool and is checked out
  if (!mAllInstances.contains(aCas) || mFreeInstances.contains(aCas)) {
    if (UIMAFramework.getLogger().isLoggable(Level.WARNING)) {
      UIMAFramework.getLogger(this.getClass()).logrb(Level.WARNING, this.getClass().getName(),
              "process", CPMUtils.CPM_LOG_RESOURCE_BUNDLE, "UIMA_CPM_invalid_checkin__WARNING",
              new Object[] { Thread.currentThread().getName() });
    }
  } else {
    // reset CAS
    aCas.reset();
    // Add the CAS to the end of the free instances List
    mFreeInstances.add(aCas);

    // get the position of the CAS in the list.
    int index = checkedOutInstances.indexOf(aCas); // new code JC 05/11/2005
    if (index != -1) {
      checkedOutInstances.remove(index);
      if (UIMAFramework.getLogger().isLoggable(Level.FINEST)) {
        UIMAFramework.getLogger(this.getClass()).logrb(
                Level.FINEST,
                this.getClass().getName(),
                "process",
                CPMUtils.CPM_LOG_RESOURCE_BUNDLE,
                "UIMA_CPM_removed_from_checkedout_list__FINEST",
                new Object[] { Thread.currentThread().getName(),
                    String.valueOf(checkedOutInstances.size()) });
      }
    }

    if (UIMAFramework.getLogger().isLoggable(Level.FINEST)) {
      UIMAFramework.getLogger(this.getClass()).logrb(
              Level.FINEST,
              this.getClass().getName(),
              "process",
              CPMUtils.CPM_LOG_RESOURCE_BUNDLE,
              "UIMA_CPM_return_cas_to_pool__FINEST",
              new Object[] { Thread.currentThread().getName(),
                  String.valueOf(checkedOutInstances.size()) });
    }
    this.notifyAll();  // when CAS becomes available
  }

}
 
Example 12
Source File: SimplePipeline.java    From uima-uimafit with Apache License 2.0 4 votes vote down vote up
/**
 * <p>
 * Provides a simple way to run a pipeline for a given collection reader and sequence of analysis
 * engines. After processing all CASes provided by the reader, the method calls
 * {@link AnalysisEngine#collectionProcessComplete() collectionProcessComplete()} on the engines.
 * Note that {@link AnalysisEngine#destroy()} and {@link CollectionReader#destroy()} are
 * <b>NOT</b> called. As the components were instantiated by the caller, they must also be managed
 * (i.e. destroyed) the caller.
 * </p>
 * <p>
 * External resources can only be shared between the reader and/or the analysis engines if the
 * reader/engines have been previously instantiated using a shared resource manager.
 * </p>
 * 
 * @param aResMgr
 *          a resource manager. Normally the same one used by the collection reader and analysis
 *          engines.
 * @param reader
 *          a collection reader
 * @param engines
 *          a sequence of analysis engines
 * @throws IOException
 *           if there is an I/O problem in the reader
 * @throws ResourceInitializationException 
 *           if there is a problem initializing or running the pipeline.
 * @throws CollectionException 
 *           if there is a problem initializing or running the pipeline.
 * @throws AnalysisEngineProcessException 
 *           if there is a problem initializing or running the pipeline.
 */
public static void runPipeline(final ResourceManager aResMgr, final CollectionReader reader,
        final AnalysisEngine... engines) throws IOException, ResourceInitializationException,
        AnalysisEngineProcessException, CollectionException {
  final List<ResourceMetaData> metaData = new ArrayList<ResourceMetaData>();
  metaData.add(reader.getMetaData());
  for (AnalysisEngine engine : engines) {
    metaData.add(engine.getMetaData());
  }

  final CAS cas = CasCreationUtils.createCas(metaData, null, aResMgr);
  reader.typeSystemInit(cas.getTypeSystem());

  while (reader.hasNext()) {
    reader.getNext(cas);
    runPipeline(cas, engines);
    cas.reset();
  }

  collectionProcessComplete(engines);
}
 
Example 13
Source File: XCASDeserializerTest.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
public void testMultipleSofas() throws Exception {
    /*************************************************
     * Make CAS with 2 sofas, initial and OtherSofa  *
     *                                               *
     * Add instance of TOP and index in both views   *
     *                                               *
     * Serialize to string "xml"                     *
     *                                               *
     * Deserialize from string                       *
     *************************************************/
    CAS cas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
    // set document text for the initial view
    cas.setDocumentText("This is a test");
    // create a new view and set its document text
    CAS cas2 = cas.createView("OtherSofa");
    cas2.setDocumentText("This is only a test");

    // Change this test to create an instance of TOP because you cannot add an annotation to other than 
    //   the view it is created in. https://issues.apache.org/jira/browse/UIMA-4099
    // create a TOP and add to index of both views
    Type topType = cas.getTypeSystem().getTopType();
    FeatureStructure aTOP = cas.createFS(topType);
    cas.getIndexRepository().addFS(aTOP);
    cas2.getIndexRepository().addFS(aTOP); 
    FSIterator<FeatureStructure> it = cas.getIndexRepository().getAllIndexedFS(topType);
    FSIterator<FeatureStructure> it2 = cas2.getIndexRepository().getAllIndexedFS(topType);
    it.next(); it.next();
    it2.next(); it2.next(); 
    assertFalse(it.hasNext());
    assertFalse(it2.hasNext());
     
    // serialize
    StringWriter sw = new StringWriter();
    XMLSerializer xmlSer = new XMLSerializer(sw, false);
    XCASSerializer xcasSer = new XCASSerializer(cas.getTypeSystem());
    xcasSer.serialize(cas, xmlSer.getContentHandler(), true);
    String xml = sw.getBuffer().toString();

    // deserialize into another CAS (repeat twice to check it still works after reset)
    CAS newCas = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(), indexes);
    for (int i = 0; i < 2; i++) {
      XCASDeserializer newDeser = new XCASDeserializer(newCas.getTypeSystem());
      ContentHandler newDeserHandler = newDeser.getXCASHandler(newCas);
      SAXParserFactory fact = SAXParserFactory.newInstance();
      SAXParser parser = fact.newSAXParser();
      XMLReader xmlReader = parser.getXMLReader();
      xmlReader.setContentHandler(newDeserHandler);
      xmlReader.parse(new InputSource(new StringReader(xml)));

      // check sofas
      assertEquals("This is a test", newCas.getDocumentText());
      CAS newCas2 = newCas.getView("OtherSofa");
      assertEquals("This is only a test", newCas2.getDocumentText());

      // check that annotation is still indexed in both views
      it = newCas.getIndexRepository().getAllIndexedFS(topType);
      it2 = newCas2.getIndexRepository().getAllIndexedFS(topType);
      it.next(); it.next();
      it2.next(); it2.next(); 
      assertFalse(it.hasNext());
      assertFalse(it2.hasNext());
//      assertTrue(tIndex.size() == 2); // document annot and this one
//      assertTrue(t2Index.size() == 2); // ditto
      newCas.reset();  // testing if works after cas reset, go around loop 2nd time
    }
  }
 
Example 14
Source File: XmiCasDeserializerTest.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
public void testOutOfTypeSystemData() throws Exception {
   // deserialize a simple XMI into a CAS with no TypeSystem    
   CAS cas = CasCreationUtils.createCas(new TypeSystemDescription_impl(),
           new TypePriorities_impl(), new FsIndexDescription[0]);
   File xmiFile = JUnitExtension.getFile("ExampleCas/simpleCas.xmi");
   String xmiStr = FileUtils.file2String(xmiFile, "UTF-8");
   
   XmiSerializationSharedData sharedData = new XmiSerializationSharedData();
   deserialize(xmiStr, cas, sharedData, true, -1);
   
   //do some checks on the out-of-type system data
   List ootsElems = sharedData.getOutOfTypeSystemElements();
   assertEquals(9, ootsElems.size());
   List ootsViewMembers = sharedData.getOutOfTypeSystemViewMembers("1");
   assertEquals(7, ootsViewMembers.size());
   
   // now reserialize including OutOfTypeSystem data
   String xmiStr2 = serialize(cas, sharedData);
   
   //deserialize both original and new XMI into CASes that do have the full typesystem
   CAS newCas1 = CasCreationUtils.createCas(typeSystem, null, indexes);
   TypeSystem ts = newCas1.getTypeSystem();
   deserialize(xmiStr, newCas1, null, false, -1);
   CAS newCas2 = CasCreationUtils.createCas(ts, null, indexes, null);
   deserialize(xmiStr2, newCas2, null, false, -1);
   CasComparer.assertEquals(newCas1, newCas2);  
   
   //Test a partial type system with a missing some missing features and
   //missing "Organization" type
   File partialTypeSystemFile = JUnitExtension.getFile("ExampleCas/partialTestTypeSystem.xml");
   TypeSystemDescription partialTypeSystem = UIMAFramework.getXMLParser().parseTypeSystemDescription(
           new XMLInputSource(partialTypeSystemFile));
   CAS partialTsCas = CasCreationUtils.createCas(partialTypeSystem, null, indexes);
   XmiSerializationSharedData sharedData2 = new XmiSerializationSharedData();
   deserialize(xmiStr, partialTsCas, sharedData2, true, -1);
   
   assertEquals(1,sharedData2.getOutOfTypeSystemElements().size());
   OotsElementData ootsFeats3 = sharedData2.getOutOfTypeSystemFeatures(sharedData2.getFsForXmiId(3));
   assertEquals(1, ootsFeats3.attributes.size());
   XmlAttribute ootsAttr = ootsFeats3.attributes.get(0);
   assertEquals("mentionType", ootsAttr.name);
   assertEquals("NAME", ootsAttr.value);
   OotsElementData ootsFeats5 = sharedData2.getOutOfTypeSystemFeatures(sharedData2.getFsForXmiId(5));
   assertEquals(0, ootsFeats5.attributes.size());
   assertEquals(1, ootsFeats5.childElements.size());
   XmlElementNameAndContents ootsChildElem = ootsFeats5.childElements.get(0);
   assertEquals("mentionType", ootsChildElem.name.qName);
   assertEquals("NAME", ootsChildElem.contents);
   
   OotsElementData ootsFeats8 = sharedData2.getOutOfTypeSystemFeatures(sharedData2.getFsForXmiId(8));
   assertEquals(1, ootsFeats8.attributes.size());
   OotsElementData ootsFeats10 = sharedData2.getOutOfTypeSystemFeatures(sharedData2.getFsForXmiId(10));
   assertEquals(1, ootsFeats10.attributes.size());
   OotsElementData ootsFeats11 = sharedData2.getOutOfTypeSystemFeatures(sharedData2.getFsForXmiId(11));
   assertEquals(4, ootsFeats11.childElements.size());
   
   String xmiStr3 = serialize(partialTsCas, sharedData2);
   newCas2.reset();
   deserialize(xmiStr3, newCas2, null, false, -1);
   CasComparer.assertEquals(newCas1, newCas2);    
}
 
Example 15
Source File: MultiprocessingAnalysisEngine_implTest.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
public void run() {
  
  while (true) {
   
    if (!MultiThreadUtils.wait4go(this)) {
      break;
    }

    try {
      
      Random r = new Random();
  
      // Test each form of the process method. When TestAnnotator executes, it
      // stores in static fields the document text and the ResultSpecification.
      // We use thse to make sure the information propagates correctly to the 
      // annotator. (However, we can't check these until after the threads are
      // finished, as their state is nondeterministic during multithreaded
      // processing.)
  
      // process(CAS)
      for (int i = 0; i < 5; i++) {
        CAS tcas = mAE.newCAS();
        mLastTypeSystem = tcas.getTypeSystem();
        tcas.setDocumentText("new test");
        mAE.process(tcas);
        Thread.sleep(0, r.nextInt(1000));  // between 0 and 1 microseconds
        tcas.reset();

        // process(CAS,ResultSpecification)
        ResultSpecification resultSpec = new ResultSpecification_impl(tcas.getTypeSystem());
        resultSpec.addResultType("NamedEntity", true);

        tcas.setDocumentText("testing...");
        Thread.sleep(0, r.nextInt(1000));  // between 0 and 1 microseconds
        mAE.process(tcas, resultSpec);
        Thread.sleep(0, r.nextInt(1000));  // between 0 and 1 microseconds
        tcas.reset();
      }
    } catch (Throwable t) {
      t.printStackTrace();
      //can't cause unit test to fail by throwing exception from thread.
      //record the failure and the main thread will check for it later.
      mFailure = t;
    }
  }
}
 
Example 16
Source File: TreeParser.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
/**
 * Gets trees from text.
 * First a sentence segmenter is used to segment the training examples in to sentences.
 * Sentences are then turned in to trees and returned.
 *
 * This will also process sentences with the following label format:
 * <YOURLABEL> some text </YOURLABEL>
 *
 * This will allow you to iterate on and label sentences and label spans yourself.
 *
 * @param text the text to process
 * @param labels
 * @return the list of trees
 * @throws Exception
 */
public List<Tree> getTreesWithLabels(String text, List<String> labels) throws Exception {
    CAS c = pool.getCas();
    c.setDocumentText(text);
    tokenizer.process(c);
    List<String> lowerCaseLabels = new ArrayList<>();
    for (String s : labels)
        lowerCaseLabels.add(s.toLowerCase());
    labels = lowerCaseLabels;

    List<Tree> ret = new ArrayList<>();
    CAS c2 = pool.getCas();
    for (Sentence sentence : JCasUtil.select(c.getJCas(), Sentence.class)) {
        List<String> tokens = new ArrayList<>();
        for (Token t : JCasUtil.selectCovered(Token.class, sentence))
            tokens.add(t.getCoveredText());

        Pair<String, MultiDimensionalMap<Integer, Integer, String>> stringsWithLabels =
                        ContextLabelRetriever.stringWithLabels(sentence.getCoveredText(), tf);
        c2.setDocumentText(stringsWithLabels.getFirst());



        tokenizer.process(c2);
        parser.process(c2);

        //build the tree based on this
        //damn it
        List<TopTreebankNode> nodes = new ArrayList<>(JCasUtil.select(c2.getJCas(), TopTreebankNode.class));
        if (nodes.size() > 1) {
            log.warn("More than one top level node for a treebank parse. Only accepting first input node.");
        }

        else if (nodes.isEmpty()) {
            c2.reset();
            continue;
        }


        Collection<String> labels2 = stringsWithLabels.getSecond().values();
        Set<String> diff = SetUtils.difference(labels2, labels);
        if (!diff.isEmpty()) {
            log.warn("Found invalid sentence. Skipping");
            c2.reset();
            continue;

        }

        TopTreebankNode node = nodes.get(0);
        ret.add(TreeFactory.buildTree(node, stringsWithLabels, labels));
        c2.reset();

    }

    pool.releaseCas(c);
    pool.releaseCas(c2);

    return ret;


}
 
Example 17
Source File: TreeParser.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
/**
 * Gets trees from text.
 * First a sentence segmenter is used to segment the training examples in to sentences.
 * Sentences are then turned in to trees and returned.
 *
 * This will also process sentences with the following label format:
 * <YOURLABEL> some text </YOURLABEL>
 *
 * This will allow you to iterate on and label sentences and label spans yourself.
 *
 * @param text the text to process
 * @param label the label for the whole sentence
 * @param labels the possible labels for the sentence
 * @return the list of trees
 * @throws Exception
 */
public List<Tree> getTreesWithLabels(String text, String label, List<String> labels) throws Exception {
    if (text.isEmpty())
        return new ArrayList<>();
    CAS c = pool.getCas();
    c.setDocumentText("<" + label + "> " + text + " </" + label + ">");
    tokenizer.process(c);
    List<String> lowerCaseLabels = new ArrayList<>();
    for (String s : labels)
        lowerCaseLabels.add(s.toLowerCase());
    labels = lowerCaseLabels;

    List<Tree> ret = new ArrayList<>();
    CAS c2 = pool.getCas();
    for (Sentence sentence : JCasUtil.select(c.getJCas(), Sentence.class)) {
        if (sentence.getCoveredText().isEmpty())
            continue;

        List<String> tokens = new ArrayList<>();
        for (Token t : JCasUtil.selectCovered(Token.class, sentence))
            tokens.add(t.getCoveredText());

        try {
            Pair<String, MultiDimensionalMap<Integer, Integer, String>> stringsWithLabels =
                            ContextLabelRetriever.stringWithLabels(sentence.getCoveredText(), tf);
            c2.setDocumentText(stringsWithLabels.getFirst());
            tokenizer.process(c2);
            parser.process(c2);

            //build the tree based on this
            List<TopTreebankNode> nodes = new ArrayList<>(JCasUtil.select(c2.getJCas(), TopTreebankNode.class));
            if (nodes.size() > 1) {
                log.warn("More than one top level node for a treebank parse. Only accepting first input node.");
            }

            else if (nodes.isEmpty()) {
                c2.reset();
                continue;
            }



            TopTreebankNode node = nodes.get(0);
            ret.add(TreeFactory.buildTree(node, stringsWithLabels, labels));
            c2.reset();

        } catch (Exception e) {
            log.warn("Unable to parse " + sentence.getCoveredText());
            c2.reset();
            continue;
        }



    }

    pool.releaseCas(c);
    pool.releaseCas(c2);

    return ret;


}
 
Example 18
Source File: MultiprocessingAnalysisEngine_implTest.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
public void run() {
      Random r = new Random();
      while (true) {

        if (!MultiThreadUtils.wait4go(this)) { // wait for go signal after all threads are setup.
          break; // time to terminate
        }
        
        try {
  
          // Test each form of the process method. When TestAnnotator executes, it
          // stores in static fields the document text and the ResultSpecification.
          // We use thse to make sure the information propogates correctly to the 
          // annotator. (However, we can't check these until after the threads are
          // finished, as their state is nondeterministic during multithreaded
          // processing.)
  
          // process(CAS)
          CAS tcas = mAE.newCAS();
//          for (int i = 0; i < 1000; i++) {  // uncomment to debug
            mLastTypeSystem = tcas.getTypeSystem();
            tcas.setDocumentText("new test");
            mAE.process(tcas);
  //          System.out.println("Debug finished processing a cas");
            if (doSleeps) 
              Thread.sleep(0, r.nextInt(1000));  // 0 to 1 microseconds
            tcas.reset();
    
            // process(CAS,ResultSpecification)
            ResultSpecification resultSpec = new ResultSpecification_impl(tcas.getTypeSystem());
            resultSpec.addResultType("NamedEntity", true);
    
            tcas.setDocumentText("testing...");
            if (doSleeps) 
              Thread.sleep(0, r.nextInt(1000));  // 0 to 1 microseconds
            mAE.process(tcas, resultSpec);
            if (doSleeps) 
              Thread.sleep(0, r.nextInt(1000));  // 0 to 1 microseconds
            tcas.reset();
//          }
        } catch (Throwable t) {
          t.printStackTrace();
          //can't cause unit test to fail by throwing exception from thread.
          //record the failure and the main thread will check for it later.
          mFailure = t;
        }
      }
    }
 
Example 19
Source File: AnalysisEngine_implTest.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
public void testProcess() throws Exception {
    try {
      // test simple primitive TextAnalysisEngine (using TestAnnotator class)
      // This test should work with or without a type system description
      AnalysisEngineDescription primitiveDesc = new AnalysisEngineDescription_impl();
      primitiveDesc.setPrimitive(true);
      primitiveDesc
              .setAnnotatorImplementationName("org.apache.uima.analysis_engine.impl.TestAnnotator");
      primitiveDesc.getMetaData().setName("Test Primitive TAE");

//      TypeSystemDescription tsd = new TypeSystemDescription_impl();
//      tsd.addType("NamedEntity", "", "uima.tcas.Annotation");
//      tsd.addType("DocumentStructure", "", "uima.cas.TOP");
//      primitiveDesc.getAnalysisEngineMetaData().setTypeSystem(tsd);
      Capability cap = new Capability_impl();
      cap.addOutputType("NamedEntity", true);
      cap.addOutputType("DocumentStructure", true);
      Capability[] caps = new Capability[] {cap};
      primitiveDesc.getAnalysisEngineMetaData().setCapabilities(caps);
      _testProcess(primitiveDesc);

      primitiveDesc = new AnalysisEngineDescription_impl();
      primitiveDesc.setPrimitive(true);
      primitiveDesc
              .setAnnotatorImplementationName("org.apache.uima.analysis_engine.impl.TestAnnotator");
      primitiveDesc.getMetaData().setName("Test Primitive TAE");

      TypeSystemDescription tsd = new TypeSystemDescription_impl();
      tsd.addType("NamedEntity", "", "uima.tcas.Annotation");
      tsd.addType("DocumentStructure", "", "uima.cas.TOP");
      primitiveDesc.getAnalysisEngineMetaData().setTypeSystem(tsd);
      cap = new Capability_impl();
      cap.addOutputType("NamedEntity", true);
      cap.addOutputType("DocumentStructure", true);
      caps = new Capability[] {cap};
      primitiveDesc.getAnalysisEngineMetaData().setCapabilities(caps);
      _testProcess(primitiveDesc);

      // test simple aggregate TextAnalysisEngine (again using TestAnnotator class)
      AnalysisEngineDescription aggDesc = new AnalysisEngineDescription_impl();
      aggDesc.setPrimitive(false);
      aggDesc.getMetaData().setName("Test Aggregate TAE");
      aggDesc.getDelegateAnalysisEngineSpecifiersWithImports().put("Test", primitiveDesc);
      FixedFlow_impl flow = new FixedFlow_impl();
      flow.setFixedFlow(new String[] { "Test" });
      aggDesc.getAnalysisEngineMetaData().setFlowConstraints(flow);
      aggDesc.getAnalysisEngineMetaData().setCapabilities(caps);
      _testProcess(aggDesc);

      // test aggregate TAE containing a CAS Consumer
      File outFile = JUnitExtension.getFile("CpmOutput.txt");
      if(outFile != null && outFile.exists()) {
        //outFile.delete() //can't be relied upon.  Instead set file to zero length.
        FileOutputStream fos = new FileOutputStream(outFile, false);
        fos.close();
        assertEquals(0,outFile.length());
      }

      AnalysisEngineDescription aggWithCcDesc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
              new XMLInputSource(JUnitExtension
                      .getFile("TextAnalysisEngineImplTest/AggregateTaeWithCasConsumer.xml")));
      
      _testProcess(aggWithCcDesc, new String[] {"en"});      
      // test that CAS Consumer ran
      if (null == outFile) {
        outFile = JUnitExtension.getFile("CpmOutput.txt");
      }
      assertTrue(outFile != null && outFile.exists());
      assertTrue(outFile.length() > 0);
      outFile.delete();
      
      //test aggregate that uses ParallelStep
      AnalysisEngineDescription desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
        new XMLInputSource(JUnitExtension.getFile("TextAnalysisEngineImplTest/AggregateForParallelStepTest.xml")));
      AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(desc);
      CAS cas = ae.newCAS();
      cas.setDocumentText("new test");
      ae.process(cas);
      assertEquals("new test", TestAnnotator.lastDocument);
      assertEquals("new test", TestAnnotator2.lastDocument);
      cas.reset();
      
    } catch (Exception e) {
      JUnitExtension.handleException(e);
    }
  }
 
Example 20
Source File: SimplePipeline.java    From uima-uimafit with Apache License 2.0 3 votes vote down vote up
/**
 * <p>
 * Run the CollectionReader and AnalysisEngines as a pipeline. After processing all CASes provided
 * by the reader, the method calls the life-cycle methods
 * ({@link AnalysisEngine#collectionProcessComplete() collectionProcessComplete()} on the engines
 * and {@link Resource#destroy() destroy()}) on all engines. Note that the life-cycle methods are
 * <b>NOT</b> called on the reader. As the reader was instantiated by the caller, it must also be
 * managed (i.e. destroyed) the caller.
 * </p>
 * <p>
 * Note that with this method, external resources cannot be shared between the reader and the
 * analysis engines. They can be shared amongst the analysis engines.
 * </p>
 * <p>
 * The CAS is created using the resource manager used by the collection reader.
 * </p>
 * 
 * @param reader
 *          The CollectionReader that loads the documents into the CAS.
 * @param descs
 *          Primitive AnalysisEngineDescriptions that process the CAS, in order. If you have a mix
 *          of primitive and aggregate engines, then please create the AnalysisEngines yourself
 *          and call the other runPipeline method.
 * @throws IOException
 *           if there is an I/O problem in the reader
 * @throws ResourceInitializationException 
 *           if there is a problem initializing or running the pipeline.
 * @throws CollectionException 
 *           if there is a problem initializing or running the pipeline.
 * @throws AnalysisEngineProcessException 
 *           if there is a problem initializing or running the pipeline.
 */
public static void runPipeline(final CollectionReader reader,
        final AnalysisEngineDescription... descs) throws IOException,
        ResourceInitializationException, AnalysisEngineProcessException, CollectionException {
  AnalysisEngine aae = null;
  try {
    // Create AAE
    final AnalysisEngineDescription aaeDesc = createEngineDescription(descs);

    // Instantiate AAE
    aae = createEngine(aaeDesc);

    // Create CAS from merged metadata
    final CAS cas = CasCreationUtils.createCas(asList(reader.getMetaData(), aae.getMetaData()), 
            null, reader.getResourceManager());
    reader.typeSystemInit(cas.getTypeSystem());

    // Process
    while (reader.hasNext()) {
      reader.getNext(cas);
      aae.process(cas);
      cas.reset();
    }

    // Signal end of processing
    aae.collectionProcessComplete();
  } finally {
    // Destroy
    LifeCycleUtil.destroy(aae);
  }
}