Java Code Examples for org.apache.uima.UIMAFramework#produceCollectionReader()

The following examples show how to use org.apache.uima.UIMAFramework#produceCollectionReader() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: InstallationTester.java    From uima-uimaj with Apache License 2.0 6 votes vote down vote up
/**
 * Checks if a given CR specifier file can be used to produce an instance of CR. Returns
 * <code>true</code>, if a CR can be instantiated, <code>false</code> otherwise.
 * 
 * @param specifier the resource specifier
 * @param resource_manager a new resource_manager
 * @param status the place where to put the results
 *
 * @throws IOException
 *           If an I/O exception occurred while creating <code>XMLInputSource</code>.
 * @throws InvalidXMLException
 *           If the XML parser failed to parse the given input file.
 * @throws ResourceInitializationException
 *           If the specified CR cannot be instantiated.
 */
private void testCollectionReader(ResourceSpecifier specifier, 
                                  ResourceManager resource_manager, 
                                  TestStatus status) 
                                            throws IOException, InvalidXMLException, ResourceInitializationException {
       CollectionReader cr = UIMAFramework.produceCollectionReader(specifier, resource_manager, null);
  
       if (cr != null) {
        status.setRetCode(TestStatus.TEST_SUCCESSFUL);
      } else {
        status.setRetCode(TestStatus.TEST_NOT_SUCCESSFUL);
        status.setMessage(I18nUtil.localizeMessage(PEAR_MESSAGE_RESOURCE_BUNDLE,
                "installation_verification_cr_not_created", new Object[] { this.pkgBrowser
                        .getInstallationDescriptor().getMainComponentId() }, null));
      }
}
 
Example 2
Source File: Conll2003ReaderTcBmeow.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
    super.initialize(aContext);

    Object[] configurationParams = getConfigurationParams(aContext);
    CollectionReaderDescription readerDescription = createReaderDescription(readerClassName,
            configurationParams);
    reader = UIMAFramework.produceCollectionReader(readerDescription, getResourceManager(), null);
}
 
Example 3
Source File: Conll2003ReaderTc.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
    super.initialize(aContext);

    Object[] configurationParams = getConfigurationParams(aContext);
    CollectionReaderDescription readerDescription = createReaderDescription(readerClassName,
            configurationParams);
    reader = UIMAFramework.produceCollectionReader(readerDescription, getResourceManager(), null);
}
 
Example 4
Source File: PipelineBuilder.java    From baleen with Apache License 2.0 5 votes vote down vote up
/** Create a new Collection Reader */
private CollectionReader createCollectionReader() throws BaleenException {
  String className = BuilderUtils.getClassNameFromConfig(collectionReaderConfig);
  Map<String, Object> params =
      BuilderUtils.flattenConfig(null, BuilderUtils.getParamsFromConfig(collectionReaderConfig));

  if (className == null || className.isEmpty()) {
    throw new InvalidParameterException("Collection Reader class not specified");
  }

  Map<String, Object> nonNullParams = params;
  if (nonNullParams == null) {
    nonNullParams = Collections.emptyMap();
  }

  try {
    Class<? extends CollectionReader> clazz =
        BuilderUtils.getClassFromString(className, getDefaultReaderPackage());
    Map<String, ExternalResourceDescription> crResources = getOrCreateResources(clazz);
    Object[] paramArr =
        BuilderUtils.mergeAndExtractParams(
            globalConfig, nonNullParams, ignoreParams, crResources);

    return UIMAFramework.produceCollectionReader(
        CollectionReaderFactory.createReaderDescription(clazz, paramArr), resourceManager, null);
  } catch (ResourceInitializationException e) {
    throw new BaleenException("Couldn't initialize collection reader", e);
  }
}
 
Example 5
Source File: SimpleRunCPM.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
/**
 * Constructor for the class.
 *
 * @param args          command line arguments into the program - see class description
 * @throws UIMAException the UIMA exception
 * @throws IOException Signals that an I/O exception has occurred.
 */
public SimpleRunCPM(String args[]) throws UIMAException, IOException {
  mStartTime = System.currentTimeMillis();

  // check command line args
  if (args.length < 3) {
    printUsageMessage();
    System.exit(1);
  }

  // create components from their descriptors

  // Collection Reader
  System.out.println("Initializing Collection Reader");
  ResourceSpecifier colReaderSpecifier = UIMAFramework.getXMLParser()
          .parseCollectionReaderDescription(new XMLInputSource(args[0]));
  CollectionReader collectionReader = UIMAFramework.produceCollectionReader(colReaderSpecifier);

  // AnalysisEngine
  System.out.println("Initializing AnalysisEngine");
  ResourceSpecifier aeSpecifier = UIMAFramework.getXMLParser().parseResourceSpecifier(
          new XMLInputSource(args[1]));
  AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(aeSpecifier);

  // CAS Consumer
  System.out.println("Initializing CAS Consumer");
  ResourceSpecifier consumerSpecifier = UIMAFramework.getXMLParser().parseCasConsumerDescription(
          new XMLInputSource(args[2]));
  CasConsumer casConsumer = UIMAFramework.produceCasConsumer(consumerSpecifier);

  // create a new Collection Processing Manager
  mCPM = UIMAFramework.newCollectionProcessingManager();

  // Register AE and CAS Consumer with the CPM
  mCPM.setAnalysisEngine(ae);
  mCPM.addCasConsumer(casConsumer);

  // Create and register a Status Callback Listener
  mCPM.addStatusCallbackListener(new StatusCallbackListenerImpl());

  // Finish setup
  mCPM.setPauseOnException(false);

  // Start Processing (in batches of 10, just for testing purposes)
  mCPM.process(collectionReader, 10);
}
 
Example 6
Source File: SimplePipeline.java    From uima-uimafit with Apache License 2.0 3 votes vote down vote up
/**
 * <p>
 * Run the CollectionReader and AnalysisEngines as a pipeline. After processing all CASes provided
 * by the reader, the method calls {@link AnalysisEngine#collectionProcessComplete()
 * collectionProcessComplete()} on the engines, {@link CollectionReader#close() close()} on the
 * reader and {@link Resource#destroy() destroy()} on the reader and all engines.
 * </p>
 * <p>
 * External resources can be shared between the reader and the analysis engines.
 * </p>
 * <p>
 * This method is suitable for the batch-processing of sets of documents where the overheaded
 * of instantiating the pipeline components does not significantly impact the overall runtime
 * of the pipeline. If you need to avoid this overhead, e.g. because you wish to run a pipeline
 * on individual documents, then you should not use this method. Instead, create a CAS using
 * {@link JCasFactory}, create a reader instance using {@link CollectionReaderFactory#createReader},
 * create an engine instance using {@link AnalysisEngineFactory#createEngine} and then use
 * a loop to process the data, resetting the CAS after each step.
 * </p>
 * <pre><code>
 *   while (reader.hasNext()) {
 *     reader.getNext(cas);
 *     engine.process(cas);
 *     cas.reset();
 *   }
 * </code></pre>
 * 
 * @param readerDesc
 *          The CollectionReader that loads the documents into the CAS.
 * @param descs
 *          Primitive AnalysisEngineDescriptions that process the CAS, in order. If you have a mix
 *          of primitive and aggregate engines, then please create the AnalysisEngines yourself
 *          and call the other runPipeline method.
 * @throws IOException
 *           if there is an I/O problem in the reader
 * @throws ResourceInitializationException 
 *           if there is a problem initializing or running the pipeline.
 * @throws CollectionException 
 *           if there is a problem initializing or running the pipeline.
 * @throws AnalysisEngineProcessException 
 *           if there is a problem initializing or running the pipeline.
 */
public static void runPipeline(final CollectionReaderDescription readerDesc,
        final AnalysisEngineDescription... descs) throws IOException,
        ResourceInitializationException, AnalysisEngineProcessException, CollectionException {
  CollectionReader reader = null;
  AnalysisEngine aae = null;
  ResourceManager resMgr = null;
  try {
    resMgr = ResourceManagerFactory.newResourceManager();
    
    // Create the components
    reader = UIMAFramework.produceCollectionReader(readerDesc, resMgr, null);

    // Create AAE
    final AnalysisEngineDescription aaeDesc = createEngineDescription(descs);

    // Instantiate AAE
    aae = UIMAFramework.produceAnalysisEngine(aaeDesc, resMgr, null);

    // Create CAS from merged metadata
    final CAS cas = CasCreationUtils.createCas(asList(reader.getMetaData(), aae.getMetaData()),
            null, resMgr);
    reader.typeSystemInit(cas.getTypeSystem());

    // Process
    while (reader.hasNext()) {
      reader.getNext(cas);
      aae.process(cas);
      cas.reset();
    }

    // Signal end of processing
    aae.collectionProcessComplete();
  } finally {
    // Destroy
    LifeCycleUtil.destroy(reader);
    LifeCycleUtil.destroy(aae);
    LifeCycleUtil.destroy(resMgr);
  }
}
 
Example 7
Source File: CollectionReaderFactory.java    From uima-uimafit with Apache License 2.0 3 votes vote down vote up
/**
 * Create a CollectionReader from an XML descriptor file and a set of configuration parameters.
 * 
 * @param descriptorPath
 *          The path to the XML descriptor file.
 * @param configurationData
 *          Any additional configuration parameters to be set. These should be supplied as (name,
 *          value) pairs, so there should always be an even number of parameters.
 * @return The CollectionReader created from the XML descriptor and the configuration parameters.
 * @throws ResourceInitializationException
 *           if the descriptor could not be created or if the component could not be instantiated
 * @throws InvalidXMLException
 *           if the descriptor could not be created
 * @throws IOException
 *           if the descriptor could not be read
 */
public static CollectionReader createReaderFromPath(String descriptorPath,
        Object... configurationData)
        throws ResourceInitializationException, InvalidXMLException, IOException {
  CollectionReaderDescription desc = createReaderDescriptionFromPath(descriptorPath,
          configurationData);
  return UIMAFramework.produceCollectionReader(desc, ResourceManagerFactory.newResourceManager(),
          null);
}
 
Example 8
Source File: CollectionReaderFactory.java    From uima-uimafit with Apache License 2.0 3 votes vote down vote up
/**
 * Get a CollectionReader from the name (Java-style, dotted) of an XML descriptor file, and a set
 * of configuration parameters.
 * 
 * @param descriptorName
 *          The fully qualified, Java-style, dotted name of the XML descriptor file.
 * @param configurationData
 *          Any additional configuration parameters to be set. These should be supplied as (name,
 *          value) pairs, so there should always be an even number of parameters.
 * @return The AnalysisEngine created from the XML descriptor and the configuration parameters.
 * @throws ResourceInitializationException 
 *           if the descriptor could not be created or if the component could not be instantiated
 * @throws InvalidXMLException 
 *           if the descriptor could not be created
 * @throws IOException
 *           if the descriptor could not be read
 */
public static CollectionReader createReader(String descriptorName, Object... configurationData)
        throws IOException, ResourceInitializationException, InvalidXMLException {
  ResourceManager resMgr = ResourceManagerFactory.newResourceManager();
  Import imp = UIMAFramework.getResourceSpecifierFactory().createImport();
  imp.setName(descriptorName);
  URL url = imp.findAbsoluteUrl(resMgr);
  ResourceSpecifier specifier = createResourceCreationSpecifier(url, configurationData);
  return UIMAFramework.produceCollectionReader(specifier, resMgr, null);
}
 
Example 9
Source File: CollectionReaderFactory.java    From uima-uimafit with Apache License 2.0 3 votes vote down vote up
/**
 * This method creates a CollectionReader from a CollectionReaderDescription adding additional
 * configuration parameter data as desired
 * 
 * @param desc
 *          a descriptor
 * @param configurationData
 *          configuration parameter data as name value pairs. Will override values already set in
 *          the description.
 * @return The CollectionReader created and initialized with the type system and configuration
 *         parameters.
 * @throws ResourceInitializationException
 *           if the component could not be initialized
 */
public static CollectionReader createReader(CollectionReaderDescription desc,
        Object... configurationData) throws ResourceInitializationException {
  CollectionReaderDescription descClone = (CollectionReaderDescription) desc.clone();
  ResourceCreationSpecifierFactory.setConfigurationParameters(descClone, configurationData);
  return UIMAFramework.produceCollectionReader(descClone,
          ResourceManagerFactory.newResourceManager(), null);
}