org.apache.uima.collection.CollectionReaderDescription Java Examples

The following examples show how to use org.apache.uima.collection.CollectionReaderDescription. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FormatSupportDescription.java    From webanno with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public CollectionReaderDescription getReaderDescription(TypeSystemDescription aTSD)
    throws ResourceInitializationException
{
    if (!isReadable()) {
        throw new UnsupportedOperationException("The format [" + getName() + "] cannot be read");
    }
    
    Class<? extends CollectionReader> readerClazz;
    try {
        readerClazz = (Class<? extends CollectionReader>) Class.forName(readerClass);
    }
    catch (ClassNotFoundException e) {
        throw new ResourceInitializationException(e);
    }
    
    return createReaderDescription(readerClazz, aTSD);
}
 
Example #2
Source File: UimaFactoryInjectionTest.java    From uima-uimafit with Apache License 2.0 6 votes vote down vote up
private static void initUimaApplicationContext(final ApplicationContext aApplicationContext) {
  new UIMAFramework_impl() {
    {
      CompositeResourceFactory_impl factory = (CompositeResourceFactory_impl) getResourceFactory();
      factory.registerFactory(CasConsumerDescription.class,
              aApplicationContext.getBean(CasConsumerFactory_impl.class));
      factory.registerFactory(CasInitializerDescription.class,
              aApplicationContext.getBean(CasInitializerFactory_impl.class));
      factory.registerFactory(CollectionReaderDescription.class,
              aApplicationContext.getBean(CollectionReaderFactory_impl.class));
      factory.registerFactory(ResourceCreationSpecifier.class,
              aApplicationContext.getBean(AnalysisEngineFactory_impl.class));
      factory.registerFactory(CustomResourceSpecifier.class,
              aApplicationContext.getBean(CustomResourceFactory_impl.class));
    }
  };
}
 
Example #3
Source File: CollectionReaderDescription_implTest.java    From uima-uimaj with Apache License 2.0 6 votes vote down vote up
public void testXMLization() throws Exception {
  try {
    // write objects to XML
    StringWriter writer = new StringWriter();
    mTestDesc.toXML(writer);
    String testDescXml = writer.getBuffer().toString();
    // System.out.println(testDescXml);

    // parse objects from XML (no schema validation)
    InputStream is = new ByteArrayInputStream(testDescXml.getBytes(encoding));
    CollectionReaderDescription newDesc = (CollectionReaderDescription) UIMAFramework
            .getXMLParser().parse(new XMLInputSource(is, null));

    // compare
    Assert.assertEquals(mTestDesc, newDesc);
  } catch (Exception e) {
    JUnitExtension.handleException(e);
  }
}
 
Example #4
Source File: CasMultiplierTest.java    From uima-uimafit with Apache License 2.0 6 votes vote down vote up
@Ignore("UIMA-3470 not fixed yet")
@Test
public void testIteratePipelineOnText() throws Exception {
  CollectionReaderDescription reader = createReaderDescription(Reader.class);
  
  AnalysisEngineDescription incrementor = createEngineDescription(TextIncrementor.class);
  
  AnalysisEngineDescription consumer = createEngineDescription(Consumer.class);
  
  int expectedResult = 4;
  for (JCas jcas : iteratePipeline(reader, incrementor, incrementor, incrementor, consumer)) {
    assertEquals(expectedResult, Consumer.textResult);
    assertEquals(expectedResult, Integer.parseInt(jcas.getDocumentText()));
    expectedResult++;
  }
}
 
Example #5
Source File: AggregateCollectionReader.java    From bluima with Apache License 2.0 6 votes vote down vote up
public AggregateCollectionReader(List<CollectionReader> readers,
    TypeSystemDescription tsd) {
try {
    CollectionReaderDescription crd = CollectionReaderFactory
	    .createReaderDescription(AggregateCollectionReader.class, tsd);
    ResourceMetaData metaData = crd.getMetaData();
    ConfigurationParameterSettings paramSettings = metaData
	    .getConfigurationParameterSettings();
    Map<String, Object> additionalParameters = new HashMap<String, Object>();
    additionalParameters
	    .put(CollectionReader.PARAM_CONFIG_PARAM_SETTINGS,
		    paramSettings);
    initialize(crd, additionalParameters);

    this.readers = readers;
    this.readerIterator = this.readers.iterator();
    currentReader = this.readerIterator.next();
} catch (ResourceInitializationException rie) {
    throw new RuntimeException(rie);
}
   }
 
Example #6
Source File: Conll2003AidaReaderTest.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
private void callReader(int begin, int end) throws NoSuchMethodException, MissingSettingException, IOException, ClassNotFoundException, UIMAException {
    CollectionReaderDescription readerDescription = Reader.getCollectionReaderDescription(Reader.COLLECTION_FORMAT.AIDA,
            PARAM_SOURCE_LOCATION, "src/test/resources/ner/test_collections/",
            PARAM_PATTERNS, "CoNLL-YAGO_ext_small_en.tsv", // 5 docs total
            PARAM_LANGUAGE, "en",
            PARAM_SINGLE_FILE, true,
            PARAM_FIRSTDOCUMENT, begin,
            PARAM_LASTDOCUMENT, end,
            PARAM_ORDER, OrderType.WORD_POS_POSITION_MENTION_ENTITY_TYPE
    );


    SimplePipeline.runPipeline(readerDescription, AnalysisEngineFactory.createEngineDescription(CasDumpWriter.class,
            PARAM_OUTPUT_FILE, "casdump.txt"));


}
 
Example #7
Source File: CasMultiplierTest.java    From uima-uimafit with Apache License 2.0 6 votes vote down vote up
/**
 * Simulates a CPE with CAS multipliers that always read one CAS and always produce one CAS.
 * It actually appears to work despite CPE not supporting CAS multipliers.
 */
@SuppressWarnings("javadoc")
@Test
public void testRunPipeline() throws Exception {
  CollectionReaderDescription reader = createReaderDescription(Reader.class);
  
  AnalysisEngineDescription incrementor = createEngineDescription(Incrementor.class);
  
  AnalysisEngineDescription consumer = createEngineDescription(Consumer.class);
  
  AnalysisEngineDescription aggregate = createEngineDescription(incrementor, incrementor,
          incrementor, consumer);
  
  runPipeline(reader, aggregate);
  
  // The order in which the consumer sees the CASes is arbitrary, in particular because we never
  // tell the CPE that the aggregate which contains the consumer cannot be scaled out.
  assertFalse(aggregate.getAnalysisEngineMetaData().getOperationalProperties()
          .isMultipleDeploymentAllowed());
  Collections.sort(Consumer.result);
  
  assertEquals(asList(4,5,6,7,8,9,10,11,12,13), Consumer.result);
}
 
Example #8
Source File: JCasIterator.java    From uima-uimafit with Apache License 2.0 6 votes vote down vote up
/**
 * Iterate over the documents loaded by the given reader, running the analysis engines on each
 * one before yielding them. By default, components <b>DO get</b> life-cycle events, such as
 * collectionProcessComplete or destroy when this constructor is used.
 * 
 * @param aResMgr
 *          The {@link ResourceManager} used to create the components and the JCas. If this
 *          parameter is {@code null} then {@link ResourceManagerFactory#newResourceManager()}
 *          will be used to obtain a resource manager. If a new resource manager was internally
 *          created, it is destroyed at the end of the pipeline (if {@link #isSelfDestroy()}).
 * @param aReader
 *          The CollectionReader for loading documents.
 * @param aEngines
 *          The AnalysisEngines for processing documents.
 * @throws ResourceInitializationException
 *           if a failure occurs during initialization of the components
 * @throws CASException
 *           if the JCas could not be initialized
 */
public JCasIterator(final ResourceManager aResMgr, final CollectionReaderDescription aReader,
        final AnalysisEngineDescription... aEngines)
        throws CASException, ResourceInitializationException {
  selfComplete = true;
  selfDestroy = true;
  
  if (aResMgr == null) {
    resMgr = newResourceManager();
    resourceManagerCreatedInternally = true;
  }
  else {
    resMgr = aResMgr;
    resourceManagerCreatedInternally = false;
  }
  
  collectionReader = produceCollectionReader(aReader, resMgr, null);

  analysisEngines = new AnalysisEngine[] {
      produceAnalysisEngine(createEngineDescription(aEngines), resMgr, null) };
  
  jCas = createCas(resMgr, collectionReader, analysisEngines);
  collectionReader.typeSystemInit(jCas.getTypeSystem());
}
 
Example #9
Source File: CpePipeline.java    From uima-uimafit with Apache License 2.0 5 votes vote down vote up
/**
 * Run the CollectionReader and AnalysisEngines as a multi-threaded pipeline.
 * 
 * @param parallelism
 *          Number of threads to use when running the analysis engines in the CPE.
 * @param readerDesc
 *          The CollectionReader that loads the documents into the CAS.
 * @param descs
 *          Primitive AnalysisEngineDescriptions that process the CAS, in order. If you have a mix
 *          of primitive and aggregate engines, then please create the AnalysisEngines yourself
 *          and call the other runPipeline method.
 * @throws SAXException
 *           if there was a XML-related problem materializing the component descriptors that are
 *           referenced from the CPE descriptor
 * @throws IOException
 *           if there was a I/O-related problem materializing the component descriptors that are
 *           referenced from the CPE descriptor
 * @throws CpeDescriptorException
 *           if there was a problem configuring the CPE descriptor
 * @throws ResourceInitializationException 
 *           if there was a problem initializing or running the CPE.
 * @throws InvalidXMLException 
 *           if there was a problem initializing or running the CPE.
 * @throws AnalysisEngineProcessException 
 *           if there was a problem running the CPE.
 */
public static void runPipeline(final int parallelism,
        final CollectionReaderDescription readerDesc, final AnalysisEngineDescription... descs)
        throws SAXException, CpeDescriptorException, IOException, ResourceInitializationException,
        InvalidXMLException, AnalysisEngineProcessException {
  // Create AAE
  final AnalysisEngineDescription aaeDesc = createEngineDescription(descs);

  CpeBuilder builder = new CpeBuilder();
  builder.setReader(readerDesc);
  builder.setAnalysisEngine(aaeDesc);
  builder.setMaxProcessingUnitThreadCount(Runtime.getRuntime().availableProcessors() - 1);

  StatusCallbackListenerImpl status = new StatusCallbackListenerImpl();
  CollectionProcessingEngine engine = builder.createCpe(status);

  engine.process();
  try {
    synchronized (status) {
      while (status.isProcessing) {
        status.wait();
      }
    }
  } catch (InterruptedException e) {
    // Do nothing
  }

  if (status.exceptions.size() > 0) {
    throw new AnalysisEngineProcessException(status.exceptions.get(0));
  }
}
 
Example #10
Source File: MultiPageEditor.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
/**
 * Link local processing descriptors from ae.
 *
 * @param d the d
 */
// **************************************************************
private void linkLocalProcessingDescriptorsFromAe(CollectionReaderDescription d) {
  d.setImplementationName(aeDescription.getAnnotatorImplementationName());
  d.setFrameworkImplementation(aeDescription.getFrameworkImplementation());
  linkCommonCollectionDescriptorsFromAe(d);
}
 
Example #11
Source File: Conll2003ReaderTcBmeow.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
    super.initialize(aContext);

    Object[] configurationParams = getConfigurationParams(aContext);
    CollectionReaderDescription readerDescription = createReaderDescription(readerClassName,
            configurationParams);
    reader = UIMAFramework.produceCollectionReader(readerDescription, getResourceManager(), null);
}
 
Example #12
Source File: XMLParser_impl.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
public CollectionReaderDescription parseCollectionReaderDescription(XMLInputSource aInput,
        ParsingOptions aOptions) throws InvalidXMLException {
  // attempt to locate resource specifier schema
  XMLizable object = parse(aInput, RESOURCE_SPECIFIER_NAMESPACE, SCHEMA_URL, aOptions);

  if (object instanceof CollectionReaderDescription) {
    return (CollectionReaderDescription) object;
  } else {
    throw new InvalidXMLException(InvalidXMLException.INVALID_CLASS, new Object[] {
        CollectionReaderDescription.class.getName(), object.getClass().getName() });
  }
}
 
Example #13
Source File: ManualEvaluation.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
private static void evaluateTrainedFile(String fileName) throws IOException, UIMAException {
        Path directoryPath;
        String trainedFile = directory + fileName;
        if (singleLabelling) {
            directoryPath = Paths.get(directory, fileName + "-singleLabel-evaluation");

            Path trainedPath = generateSingleLabeledFile(trainedFile);
            trainedFile = trainedPath.toString();
        } else {
            directoryPath = Paths.get(trainedFile + "-evaluation");
        }

        Files.createDirectory(directoryPath);
//         produce jcas with trained file
        CollectionReaderDescription reader = createReaderDescription(Conll2003AidaReader.class,
                PARAM_LANGUAGE, language,
                Conll2003AidaReader.PARAM_SINGLE_FILE, true,
                Conll2003AidaReader.PARAM_ORDER, WORD_POSITION_TYPE,
                PARAM_SOURCE_LOCATION, trainedFile,
                Conll2003AidaReader.PARAM_MANUAL_TOKENS_NER, false,
                Conll2003AidaReader.PARAM_NAMED_ENTITY_PER_TOKEN, true);

        AnalysisEngineDescription mentionSpansWriter = createEngineDescription(MentionSpansEvaluationWriter.class,
                MentionSpansEvaluationWriter.PARAM_OUTPUT_FILE, directoryPath.toString() + "/ManualSpanEvaluation.txt");

        System.out.println("Running mention spans evaluation");
        SimplePipeline.runPipeline(reader, manualAnnotatorPerMentionDescription, nerMentionAnnotatorDescription, mentionSpansWriter);

        AnalysisEngineDescription predictionsWriter = createEngineDescription(PredictionsWriter.class,
                PredictionsWriter.PARAM_LANGUAGE, language,
                PredictionsWriter.PARAM_MENTION_OUTPUT_FILE, directoryPath.toString() + "/ConllMentionEvaluation.txt",
                PredictionsWriter.PARAM_TOKEN_OUTPUT_FILE, directoryPath.toString() + "/ConllTokenEvaluation.txt",
                PredictionsWriter.PARAM_KNOW_NER, true,
                PredictionsWriter.PARAM_POSITION_TYPE,  ConllEvaluation.TrainedPositionType.ORIGINAL);

        System.out.println("Running tokens and mentions evaluation");
        SimplePipeline.runPipeline(reader, manualAnnotatorPerTokenDescription, predictionsWriter);
    }
 
Example #14
Source File: CollectionReaderDescription_implTest.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
public void testSerialization() throws Exception {
  try {
    // serialize objects to byte array

    byte[] testDescBytes = SerializationUtils.serialize(mTestDesc);

    // deserialize
    CollectionReaderDescription newDesc = (CollectionReaderDescription) SerializationUtils
            .deserialize(testDescBytes);

    Assert.assertEquals(mTestDesc, newDesc);
  } catch (Exception e) {
    JUnitExtension.handleException(e);
  }
}
 
Example #15
Source File: ExternalResourceFactory.java    From uima-uimafit with Apache License 2.0 5 votes vote down vote up
/**
 * Convenience method to set the external resource dependencies on a resource specifier.
 * Unfortunately different methods need to be used for different sub-classes.
 * 
 * @throws IllegalArgumentException
 *           if the sub-class passed is not supported.
 */
private static void setResourceDependencies(ResourceSpecifier aDesc,
        ExternalResourceDependency[] aDependencies) {
  if (aDesc instanceof CollectionReaderDescription) {
    ((CollectionReaderDescription) aDesc).setExternalResourceDependencies(aDependencies);
  } else if (aDesc instanceof AnalysisEngineDescription) {
    ((AnalysisEngineDescription) aDesc).setExternalResourceDependencies(aDependencies);
  } else {
    throw new IllegalArgumentException(
            "Resource specified cannot have external resource dependencies");
  }
}
 
Example #16
Source File: SparkUimaUtils.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
public static void createSequenceFile(Object[] params, String uri)
    throws URISyntaxException, IOException, UIMAException, NoSuchMethodException, MissingSettingException, ClassNotFoundException {
  Configuration conf = new Configuration();
  Path path = new Path(uri);
  Writer writer =
      SequenceFile.createWriter(
          conf, Writer.file(path),
          Writer.keyClass(Text.class),
          Writer.valueClass(SCAS.class));

  int count = 0;

  CollectionReaderDescription readerDescription = Reader.getCollectionReaderDescription(Reader.COLLECTION_FORMAT.NYT, params);
  for (JCas jCas : SimplePipelineCasPoolIterator.iteratePipeline(20, readerDescription)) {
      if(JCasUtil.exists(jCas, DocumentMetaData.class)) {
        ++count;
        // Get the ID.
        DocumentMetaData dmd = JCasUtil.selectSingle(jCas, DocumentMetaData.class);
        String docId = "NULL";
        if (dmd != null) {
          docId = dmd.getDocumentId();
        } else {
          throw new IOException("No Document ID for xml: " + jCas.getView("xml").getDocumentText());
        }
        Text docIdText = new Text(docId);
        SCAS scas = new SCAS(jCas.getCas());
        writer.append(docIdText, scas);
      }
      jCas.release();
  }
  logger.info("Wrote " + count + " documents to " + uri);
  IOUtils.closeStream(writer);
}
 
Example #17
Source File: ExternalResourceFactory.java    From uima-uimafit with Apache License 2.0 5 votes vote down vote up
/**
 * Convenience method to get the external resource dependencies from a resource specifier.
 * Unfortunately different methods need to be used for different sub-classes.
 * 
 * @throws IllegalArgumentException
 *           if the sub-class passed is not supported.
 */
private static ExternalResourceDependency[] getResourceDependencies(
        ResourceSpecifier aDesc) {
  if (aDesc instanceof CollectionReaderDescription) {
    return ((CollectionReaderDescription) aDesc).getExternalResourceDependencies();
  } else if (aDesc instanceof AnalysisEngineDescription) {
    return ((AnalysisEngineDescription) aDesc).getExternalResourceDependencies();
  } else {
    throw new IllegalArgumentException(
            "Resource specified cannot have external resource dependencies");
  }
}
 
Example #18
Source File: CpeBuilder.java    From bluima with Apache License 2.0 5 votes vote down vote up
@Override
public void setReader(CollectionReaderDescription aDesc)
        throws IOException, SAXException, CpeDescriptorException {
    // Remove all collection readers
    cpeDesc.setAllCollectionCollectionReaders(new CpeCollectionReader[0]);
    URL descUrl = materializeDescriptor(aDesc).toURI().toURL();
    CpeCollectionReader reader = produceCollectionReader(descUrl.toString());
    cpeDesc.addCollectionReader(reader);
}
 
Example #19
Source File: MultiPageEditor.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
/**
 * Creates the and link local processing descriptors to ae.
 *
 * @param d the d
 * @throws ResourceInitializationException the resource initialization exception
 */
private void createAndLinkLocalProcessingDescriptorsToAe(CollectionReaderDescription d)
        throws ResourceInitializationException {
  aeDescription = UIMAFramework.getResourceSpecifierFactory().createAnalysisEngineDescription();
  aeDescription.setAnnotatorImplementationName(d.getImplementationName());
  aeDescription.setFrameworkImplementation(d.getFrameworkImplementation());
  linkLocalProcessingDescriptorsToAe(d);
}
 
Example #20
Source File: CpeBuilder.java    From bluima with Apache License 2.0 5 votes vote down vote up
/** use default ctor and setters instead */
@Deprecated
public CpeBuilder(int aMaxProcessingUnitThreatCount,
        CollectionReaderDescription aDesc) throws IOException,
        SAXException, CpeDescriptorException {
    setReader(aDesc);
    setMaxProcessingUnitThreatCount(aMaxProcessingUnitThreatCount);
}
 
Example #21
Source File: UimaHelpers.java    From biomedicus with Apache License 2.0 5 votes vote down vote up
public static CollectionReaderDescription loadCollectionReaderDescription(Path path)
    throws BiomedicusException {
  CollectionReaderDescription collectionReaderDescription;
  try {
    XMLInputSource aInput = new XMLInputSource(path.toFile());
    collectionReaderDescription = UIMAFramework.getXMLParser()
        .parseCollectionReaderDescription(aInput);
  } catch (IOException | InvalidXMLException e) {
    throw new BiomedicusException(e);
  }
  return collectionReaderDescription;
}
 
Example #22
Source File: CollectionReaderFactory_implTest.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
public void testInvalidFrameworkImplementation() {
  CollectionReaderDescription desc = new CollectionReaderDescription_impl();
  desc.setFrameworkImplementation("foo");    
  try {
    ccFactory.produceResource(CollectionReader.class, desc, Collections.EMPTY_MAP);
    fail();
  } catch (ResourceInitializationException e) {
    assertNotNull(e.getMessage());
    assertFalse(e.getMessage().startsWith("EXCEPTION MESSAGE LOCALIZATION FAILED"));
    assertEquals(e.getMessageKey(), ResourceInitializationException.UNSUPPORTED_FRAMEWORK_IMPLEMENTATION);
  }
}
 
Example #23
Source File: CasDataCollectionReader_ImplBase.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
/**
 * Called by the framework to initialize this Collection Reader. Subclasses should generally NOT
 * override this method; instead they should override the zero-argument {@link #initialize()}
 * method and access metadata via the {@link #getProcessingResourceMetaData()} method. This method
 * is non-final only for legacy reasons.
 * 
 * @see org.apache.uima.resource.Resource#initialize(org.apache.uima.resource.ResourceSpecifier,
 *      java.util.Map)
 */
public boolean initialize(ResourceSpecifier aSpecifier, Map<String, Object> aAdditionalParams)
        throws ResourceInitializationException {
  // aSpecifier must be a CollectionReaderDescription
  if (aSpecifier instanceof CollectionReaderDescription) {
    // do framework intitialiation
    if (super.initialize(aSpecifier, aAdditionalParams)) {
      // do user initialization
      initialize();
      return true;
    }
  }
  return false;
}
 
Example #24
Source File: CollectionReaderFactoryTest.java    From uima-uimafit with Apache License 2.0 5 votes vote down vote up
@Test
public void thatCreateReaderDescriptorAutoDetectionWorks() throws Exception
{
  CollectionReaderDescription aed = createReaderDescription(TestCR.class);
  
  TypeSystemDescription tsd = createTypeSystemDescription();
  assertThat(tsd.getType(Token.class.getName()))
      .as("Token type auto-detection")
      .isNotNull();
  assertThat(tsd.getType(Sentence.class.getName()))
      .as("Sentence type auto-detection")
      .isNotNull();
  assertThat(tsd.getType(AnalyzedText.class.getName()))
      .as("AnalyzedText type auto-detection")
      .isNotNull();

  TypePriorityList[] typePrioritiesLists = typePriorities.getPriorityLists();
  assertThat(typePrioritiesLists.length).isEqualTo(1);
  assertThat(typePrioritiesLists[0].getTypes())
      .as("Type priorities auto-detection")
      .containsExactly(Sentence.class.getName(), AnalyzedText.class.getName(), Token.class.getName());

  FsIndexDescription[] indexes = aed.getCollectionReaderMetaData().getFsIndexCollection().getFsIndexes();
  assertThat(indexes.length).isEqualTo(1);
  assertThat(indexes[0])
      .extracting(FsIndexDescription::getLabel, FsIndexDescription::getTypeName, FsIndexDescription::getKind)
      .containsExactly("Automatically Scanned Index", Token.class.getName(), FsIndexDescription.KIND_SORTED);
}
 
Example #25
Source File: CollectionReaderFactoryTest.java    From uima-uimafit with Apache License 2.0 5 votes vote down vote up
@Test
public void testResourceMetaData() throws Exception
{
  CollectionReaderDescription desc = CollectionReaderFactory
          .createReaderDescription(TestCR.class);
  
  org.apache.uima.resource.metadata.ResourceMetaData meta = desc.getMetaData();
  
  assertEquals("dummy", meta.getName());
  assertEquals("1.0", meta.getVersion());
  assertEquals("Just a dummy", meta.getDescription());
  assertEquals("ASL 2.0", meta.getCopyright());
  assertEquals("uimaFIT", meta.getVendor());
}
 
Example #26
Source File: DescriptorMakeUtil.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
public static String makeCollectionReader(String descFileName, boolean shouldCrash,
        String functionName, int errorCount, String exceptionName, int documentCount)
        throws Exception {

  XMLInputSource in = new XMLInputSource(descFileName);
  CollectionReaderDescription crd = UIMAFramework.getXMLParser()
          .parseCollectionReaderDescription(in);
  crd.getCollectionReaderMetaData().getConfigurationParameterSettings().setParameterValue(
          "DocumentCount", documentCount);
  // set the function to crash, if desired
  if (shouldCrash) {
    crd.getCollectionReaderMetaData().getConfigurationParameterSettings().setParameterValue(
            "ErrorFunction", functionName);
    crd.getCollectionReaderMetaData().getConfigurationParameterSettings().setParameterValue(
            "ErrorCount", errorCount);
    crd.getCollectionReaderMetaData().getConfigurationParameterSettings().setParameterValue(
            "ErrorException", exceptionName);
  }
  File baseDir = JUnitExtension.getFile("CpmTests" + FS + "CpeDesc");

  if (!baseDir.exists()) {
    baseDir.mkdir();
  }

  File tmpFileName = new File(baseDir, "TmpCollectionReader.xml");
  OutputStream out = new FileOutputStream(tmpFileName);
  serializeDescriptor(crd, out);
  return tmpFileName.getAbsolutePath();
}
 
Example #27
Source File: Conll2012FormatSupport.java    From webanno with Apache License 2.0 5 votes vote down vote up
@Override
public CollectionReaderDescription getReaderDescription(TypeSystemDescription aTSD)
    throws ResourceInitializationException
{
    return createReaderDescription(Conll2012Reader.class, aTSD,
            // Constituents are not supported by WebAnno and trying to read a file which does
            // not have them triggers an NPE in DKPro Core 1.11.0
            Conll2012Reader.PARAM_READ_CONSTITUENT, false);
}
 
Example #28
Source File: XmiFormatSupport.java    From webanno with Apache License 2.0 5 votes vote down vote up
@Override
public CollectionReaderDescription getReaderDescription(TypeSystemDescription aTSD)
    throws ResourceInitializationException
{
    return createReaderDescription(XmiReader.class,
            XmiReader.PARAM_LENIENT, true);
}
 
Example #29
Source File: AggregateWithReaderTest.java    From uima-uimafit with Apache License 2.0 5 votes vote down vote up
/**
 * Demo of running a collection reader as part of an aggregate engine. This allows to run a
 * pipeline an access the output CASes directly - no need to write the data to files.
 */
@Test
public void demoAggregateWithReader() throws UIMAException {
  ResourceSpecifierFactory factory = UIMAFramework.getResourceSpecifierFactory();

  CollectionReaderDescription reader = factory.createCollectionReaderDescription();
  reader.getMetaData().setName("reader");
  reader.setImplementationName(SimpleReader.class.getName());

  AnalysisEngineDescription analyzer = factory.createAnalysisEngineDescription();
  analyzer.getMetaData().setName("analyzer");
  analyzer.setPrimitive(true);
  analyzer.setImplementationName(SimpleAnalyzer.class.getName());

  FixedFlow flow = factory.createFixedFlow();
  flow.setFixedFlow(new String[] { "reader", "analyzer" });

  AnalysisEngineDescription aggregate = factory.createAnalysisEngineDescription();
  aggregate.getMetaData().setName("aggregate");
  aggregate.getAnalysisEngineMetaData().setFlowConstraints(flow);
  aggregate.getAnalysisEngineMetaData().getOperationalProperties().setOutputsNewCASes(true);
  aggregate.getAnalysisEngineMetaData().getOperationalProperties()
          .setMultipleDeploymentAllowed(false);
  aggregate.setPrimitive(false);
  aggregate.getDelegateAnalysisEngineSpecifiersWithImports().put("reader", reader);
  aggregate.getDelegateAnalysisEngineSpecifiersWithImports().put("analyzer", analyzer);

  AnalysisEngine pipeline = UIMAFramework.produceAnalysisEngine(aggregate);
  CasIterator iterator = pipeline.processAndOutputNewCASes(pipeline.newCAS());
  while (iterator.hasNext()) {
    CAS cas = iterator.next();
    System.out.printf("[%s] is [%s]%n", cas.getDocumentText(), cas.getDocumentLanguage());
  }
}
 
Example #30
Source File: TeiReaderTest.java    From webanno with Apache License 2.0 5 votes vote down vote up
@Test
 @Ignore("No TEI yet to opensource ")
public void testTeiReader()
    throws Exception
{
    CollectionReaderDescription reader = createReaderDescription(TeiReader.class,
            TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_SOURCE_LOCATION,
            "classpath:/local/", TeiReader.PARAM_PATTERNS, new String[] { "[+]*.xml" });

    String firstSentence = "70 I DAG.";

    for (JCas jcas : new JCasIterable(reader)) {
        DocumentMetaData meta = DocumentMetaData.get(jcas);
        String text = jcas.getDocumentText();
        System.out.printf("%s - %d%n", meta.getDocumentId(), text.length());
        System.out.println(jcas.getDocumentLanguage());

        assertEquals(2235, JCasUtil.select(jcas, Token.class).size());
        assertEquals(745, JCasUtil.select(jcas, POS.class).size());
        assertEquals(745, JCasUtil.select(jcas, Lemma.class).size());
        assertEquals(0, JCasUtil.select(jcas, NamedEntity.class).size());
        assertEquals(30, JCasUtil.select(jcas, Sentence.class).size());

        assertEquals(firstSentence, JCasUtil.select(jcas, Sentence.class).iterator().next()
                .getCoveredText());
    }

}