Java Code Examples for org.apache.uima.fit.factory.AnalysisEngineFactory#createEngine()

The following examples show how to use org.apache.uima.fit.factory.AnalysisEngineFactory#createEngine() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BlacklistTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testBlacklistEntityValue() throws Exception {
  AnalysisEngine rneAE =
      AnalysisEngineFactory.createEngine(
          Blacklist.class,
          Blacklist.PARAM_BLACKLIST,
          new String[] {LONDON, UNITED_KINGDOM},
          Blacklist.PARAM_CHECK_ENTITY_VALUE,
          true);
  createDocument(jCas);

  rneAE.process(jCas);

  assertCorrect(1, 1, 0);

  rneAE.destroy();
}
 
Example 2
Source File: ListTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testmultipleHitsWithText() throws Exception {

  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          List.class, List.PARAM_TERMS, terms, List.PARAM_TYPE, LOCATION);

  AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed);

  // the same search term appears multiple times in text...
  jCas.setDocumentText("Hello world, and hello world again.");
  // but then subset using a Text annotation
  new Text(jCas, 10, jCas.getDocumentText().length()).addToIndexes();

  ae.process(jCas);

  assertEquals(1, JCasUtil.select(jCas, Location.class).size());
  Location l = JCasUtil.selectByIndex(jCas, Location.class, 0);
  assertEquals(WORLD, l.getValue());
  assertEquals(WORLD, l.getCoveredText());
  assertTrue(l.getBegin() > 10);

  ae.destroy();
}
 
Example 3
Source File: FixedExpressionSpotterSpec.java    From termsuite-core with Apache License 2.0 6 votes vote down vote up
private AnalysisEngine makeAE(boolean removeWordAnnotationFromCas, boolean removeTermOccAnnotationFromCas) throws Exception {
	AnalysisEngineDescription aeDesc = AnalysisEngineFactory.createEngineDescription(
			FixedExpressionSpotter.class,
			FixedExpressionSpotter.FIXED_EXPRESSION_MAX_SIZE, 5,
			FixedExpressionSpotter.REMOVE_WORD_ANNOTATIONS_FROM_CAS, removeWordAnnotationFromCas,
			FixedExpressionSpotter.REMOVE_TERM_OCC_ANNOTATIONS_FROM_CAS, removeTermOccAnnotationFromCas
		);
	
	/*
	 * The term index resource
	 */
	ExternalResourceDescription fixedExpressionDesc = ExternalResourceFactory.createExternalResourceDescription(
			FixedExpressionResource.FIXED_EXPRESSION_RESOURCE,
			FixedExpressionResource.class, 
			"file:fr/univnantes/termsuite/test/resources/french-fixed-expressions.txt"
	);
	ExternalResourceFactory.bindResource(aeDesc, fixedExpressionDesc);

	AnalysisEngine ae = AnalysisEngineFactory.createEngine(aeDesc);
	return ae;
}
 
Example 4
Source File: StructuralHtmlTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testOutputData() throws Exception {
  final AnalysisEngine consumer =
      AnalysisEngineFactory.createEngine(
          StructuralHtml.class,
          Html5.PARAM_OUTPUT_FOLDER,
          outputFolder.getPath(),
          StructuralHtml.PARAM_OUTPUT_DATA,
          true);
  final DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
  da.setSourceUri("test.txt");

  jCas.setDocumentText("Example");

  final Document d = new Document(jCas);
  d.setBegin(0);
  d.setEnd("Example".length());
  d.addToIndexes();

  consumer.process(jCas);

  final File f = new File(outputFolder, "test.txt.html");
  assertTrue(f.exists());

  assertEquals(
      Jsoup.parse(f, "UTF-8").html().replaceAll("\\s*", ""),
      EXPECTED_DATA.replaceAll("\\s*", ""));
}
 
Example 5
Source File: CorefCapitalisationAndApostropheTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testExistingReferentsMerge() throws Exception {
  AnalysisEngine corefCapAE =
      AnalysisEngineFactory.createEngine(
          CorefCapitalisationAndApostrophe.class, "mergeReferents", true);

  jCas.setDocumentText(TEXT);

  ReferenceTarget rt1 = Annotations.createReferenceTarget(jCas);
  ReferenceTarget rt2 = Annotations.createReferenceTarget(jCas);

  Person p1 = Annotations.createPerson(jCas, 0, 5, JAMES);
  p1.setReferent(rt1);
  Person p2 = Annotations.createPerson(jCas, 22, 27, JAMES_UC);
  p2.setReferent(rt2);

  corefCapAE.process(jCas);

  assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size());

  ReferenceTarget rtt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0);
  Person p1t = JCasUtil.selectByIndex(jCas, Person.class, 0);
  Person p2t = JCasUtil.selectByIndex(jCas, Person.class, 1);

  assertEquals(rtt, p1t.getReferent());
  assertEquals(rtt, p2t.getReferent());
}
 
Example 6
Source File: PosUimaTokenizer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public static AnalysisEngine defaultAnalysisEngine() {
    try {
        return AnalysisEngineFactory.createEngine(AnalysisEngineFactory.createEngineDescription(
                        SentenceAnnotator.getDescription(), TokenizerAnnotator.getDescription(),
                        PoStagger.getDescription("en"), StemmerAnnotator.getDescription("English")));
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
 
Example 7
Source File: ConfigurationParameterInitializerTest.java    From uima-uimafit with Apache License 2.0 5 votes vote down vote up
/**
 * If a parameter value is set to null, that is as good as if it was not set at all. If it is
 * mandatory, an exception has to be thrown.
 */
@Test(expected = ResourceInitializationException.class)
public void testMandatoryParameterSetToNull() throws Exception {
  AnalysisEngine aed = AnalysisEngineFactory.createEngine(DefaultValueAE2.class, 
          DefaultValueAE2.PARAM_COLOR, null);
  DefaultValueAE2 ae = new DefaultValueAE2();
  ae.initialize(aed.getUimaContext());
}
 
Example 8
Source File: GetStartedQuickPipeline.java    From uima-uimafit with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws UIMAException {
  // uimaFIT automatically uses all type systems listed in META-INF/org.apache.uima.fit/types.txt

  // uimaFIT doesn't provide any collection readers - so we will just instantiate a JCas and
  // run it through our AE
  JCas jCas = JCasFactory.createJCas();

  // Instantiate the analysis engine using the value "uimaFIT" for the parameter
  // PARAM_STRING ("stringParam").
  AnalysisEngine analysisEngine = AnalysisEngineFactory.createEngine(GetStartedQuickAE.class,
          GetStartedQuickAE.PARAM_STRING, "uimaFIT");

  // run the analysis engine and look for a special greeting in your console.
  analysisEngine.process(jCas);
}
 
Example 9
Source File: HmsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Before
public void before() throws UIMAException {
  ExternalResourceDescription tokensDesc =
      ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class);
  ExternalResourceDescription sentencesDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "sentences", SharedOpenNLPModel.class);
  ExternalResourceDescription posDesc =
      ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class);
  ExternalResourceDescription chunksDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "phraseChunks", SharedOpenNLPModel.class);

  AnalysisEngineDescription desc =
      AnalysisEngineFactory.createEngineDescription(
          OpenNLP.class,
          "tokens",
          tokensDesc,
          "sentences",
          sentencesDesc,
          "posTags",
          posDesc,
          "phraseChunks",
          chunksDesc);

  languageAE = AnalysisEngineFactory.createEngine(desc);
}
 
Example 10
Source File: PosUimaTokenizer.java    From DataVec with Apache License 2.0 5 votes vote down vote up
public static AnalysisEngine defaultAnalysisEngine() {
    try {
        return AnalysisEngineFactory.createEngine(AnalysisEngineFactory.createEngineDescription(
                        SentenceAnnotator.getDescription(), TokenizerAnnotator.getDescription(),
                        PoStagger.getDescription("en"), StemmerAnnotator.getDescription("English")));
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
 
Example 11
Source File: NPElementTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Before
public void before() throws UIMAException {
  ExternalResourceDescription tokensDesc =
      ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class);
  ExternalResourceDescription sentencesDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "sentences", SharedOpenNLPModel.class);
  ExternalResourceDescription posDesc =
      ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class);
  ExternalResourceDescription chunksDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "phraseChunks", SharedOpenNLPModel.class);

  AnalysisEngineDescription desc =
      AnalysisEngineFactory.createEngineDescription(
          OpenNLP.class,
          "tokens",
          tokensDesc,
          "sentences",
          sentencesDesc,
          "posTags",
          posDesc,
          "phraseChunks",
          chunksDesc);

  languageAE = AnalysisEngineFactory.createEngine(desc);
}
 
Example 12
Source File: ActiveMQTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void setupClass() throws UIMAException, JMSException {
  // Configuration values
  Object[] configArr =
      new String[] {
        SharedActiveMQResource.PARAM_PROTOCOL,
        PROTOCOL_VALUE,
        SharedActiveMQResource.PARAM_HOST,
        HOST_VALUE,
        SharedActiveMQResource.PARAM_BROKERARGS,
        BROKERARGS_VALUE
      };

  // Create descriptors
  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          ACTIVEMQ, SharedActiveMQResource.class, configArr);
  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          ActiveMQ.class,
          TypeSystemSingleton.getTypeSystemDescriptionInstance(),
          ACTIVEMQ,
          erd,
          ActiveMQ.PARAM_ENDPOINT,
          ENDPOINT);

  // Create annotator
  ae = AnalysisEngineFactory.createEngine(aed);

  // Get resource so that we can use it to test output
  resource = (SharedActiveMQResource) ae.getUimaContext().getResourceObject(ACTIVEMQ);
  // Subscribe to what will be the output topic
  Session session = resource.getSession();
  topicConsumer = session.createConsumer(session.createTopic(ENDPOINT));
}
 
Example 13
Source File: PostcodeTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testInvalidPostcode() throws Exception {
  AnalysisEngine ae = AnalysisEngineFactory.createEngine(Postcode.class);

  jCas.setDocumentText("Porton Down is not located at JP4 0JQ.");
  ae.process(jCas);

  assertAnnotations(0, Coordinate.class);
}
 
Example 14
Source File: RemoveNestedLocationsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testNoGeoJson() throws Exception {
  AnalysisEngine rneAE = AnalysisEngineFactory.createEngine(RemoveNestedLocations.class);

  jCas.setDocumentText(NORTH_LONDON);

  Annotations.createLocation(jCas, 6, 12, LONDON, null);
  Location l2 = Annotations.createLocation(jCas, 0, 12, NORTH_LONDON, null);

  rneAE.process(jCas);
  assertEquals(1, JCasUtil.select(jCas, Location.class).size());
  assertEquals(l2, JCasUtil.selectByIndex(jCas, Location.class, 0));
}
 
Example 15
Source File: AbstractRegexNPAnnotatorTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testChunks() throws Exception {
  ExternalResourceDescription tokensDesc =
      ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class);
  ExternalResourceDescription sentencesDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "sentences", SharedOpenNLPModel.class);
  ExternalResourceDescription posDesc =
      ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class);
  ExternalResourceDescription chunksDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "phraseChunks", SharedOpenNLPModel.class);

  AnalysisEngineDescription descNLP =
      AnalysisEngineFactory.createEngineDescription(
          OpenNLP.class,
          "tokens",
          tokensDesc,
          "sentences",
          sentencesDesc,
          "posTags",
          posDesc,
          "phraseChunks",
          chunksDesc);
  AnalysisEngine aeNLP = AnalysisEngineFactory.createEngine(descNLP);

  AnalysisEngine ae = AnalysisEngineFactory.createEngine(TestAnnotator.class);

  jCas.setDocumentText("PERSON JOHN SMITH WAS SEEN ENTERING THE WAREHOUSE");
  aeNLP.process(jCas);
  ae.process(jCas);

  assertEquals(1, JCasUtil.select(jCas, Person.class).size());
  assertEquals("JOHN SMITH", JCasUtil.selectByIndex(jCas, Person.class, 0).getValue());
}
 
Example 16
Source File: RakeKeywordsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testMaxNumber()
    throws ResourceInitializationException, AnalysisEngineProcessException {
  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          STOPWORDS, SharedStopwordResource.class);
  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          RakeKeywords.class,
          STOPWORDS,
          erd,
          RakeKeywords.PARAM_MAX_KEYWORDS,
          3,
          RakeKeywords.PARAM_ADD_BUZZWORDS,
          false);

  AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed);

  jCas.setDocumentText(
      "Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given. These criteria and the corresponding algorithms for contructing a minimal supporting set of solutions can be used in solving all the considered types of systems and systems of mixed types.");
  ae.process(jCas);

  JCasMetadata metadata = new JCasMetadata(jCas);
  Set<String> keywords = metadata.findAll(KEYWORD_METADATA_KEY);

  assertEquals(3, keywords.size());
  assertTrue(keywords.contains("minimal generating sets"));
  assertTrue(keywords.contains("linear diophantine equations"));
  assertTrue(keywords.contains("minimal supporting set"));

  ae.destroy();
}
 
Example 17
Source File: WordDistributionDocumentSummaryTest.java    From baleen with Apache License 2.0 4 votes vote down vote up
@Override
protected AnalysisEngine[] createAnalysisEngines() throws ResourceInitializationException {

  ExternalResourceDescription tokensDesc =
      ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class);
  ExternalResourceDescription sentencesDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "sentences", SharedOpenNLPModel.class);
  ExternalResourceDescription posDesc =
      ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class);
  ExternalResourceDescription chunksDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "phraseChunks", SharedOpenNLPModel.class);

  AnalysisEngineDescription documentSummaryAnalysisEngineDescription =
      AnalysisEngineFactory.createEngineDescription(
          WordDistributionDocumentSummary.class,
          DESIRED_SUMMARY_CHARACTER_COUNT,
          300,
          FREQUENCY_THRESHOLD,
          1);

  AnalysisEngineDescription openNlpAnalysisEngineDescription =
      AnalysisEngineFactory.createEngineDescription(
          OpenNLP.class,
          "tokens",
          tokensDesc,
          "sentences",
          sentencesDesc,
          "posTags",
          posDesc,
          "phraseChunks",
          chunksDesc);

  AnalysisEngine openNlpAnalysisEngine =
      AnalysisEngineFactory.createEngine(openNlpAnalysisEngineDescription);

  AnalysisEngine documentSummaryAnalysisEngine =
      AnalysisEngineFactory.createEngine(documentSummaryAnalysisEngineDescription);

  return new AnalysisEngine[] {openNlpAnalysisEngine, documentSummaryAnalysisEngine};
}
 
Example 18
Source File: BlueAnnotationViewerAnnotator.java    From bluima with Apache License 2.0 4 votes vote down vote up
public static AnalysisEngine getAE() throws FileNotFoundException,
        ResourceInitializationException {
    return AnalysisEngineFactory.createEngine(getAED());
}
 
Example 19
Source File: XmiTest.java    From uima-uimafit with Apache License 2.0 3 votes vote down vote up
/**
 * In this test we have removed the dependency on running Annotator1 and Annotator2 before running
 * Annotator3 by introducing an XMI file that contains the token annnotations created by
 * Annotator1 and the pos tags added by Annotator2. This is nice because both Annotator1 and
 * Annotator2 do a pretty poor job at their tasks and you can imagine that in future versions
 * their behavior might change. However, Annotator3 does a perfectly fine job doing what it does
 * and tests for this analysis engine should not have to change just because the behavior of
 * Annotator1 and Annotator2 will. Another option is to set up all the annotations required by
 * Annotator3 manually, but this approach can be tedious, time consuming, error prone, and results
 * in a lot of code.
 * <p>
 * The XMI file is generated once by running {@link #main(String[])}. Hopefully, it will not be
 * necessary to regenerate the XMI file often.
 */
@Test
public void testWithXmi() throws Exception {
  jCas = JCasFactory.createJCas("src/test/resources/org/apache/uima/fit/examples/xmi/1.xmi",
          typeSystemDescription);
  AnalysisEngine a3 = AnalysisEngineFactory.createEngine(Annotator3.class,
          typeSystemDescription);
  a3.process(jCas);
  Sentence sentence = JCasUtil.selectByIndex(jCas, Sentence.class, 0);
  assertEquals("metnetpetsetvetwetyet", sentence.getCoveredText());
}
 
Example 20
Source File: AbstractAnnotatorTest.java    From baleen with Apache License 2.0 2 votes vote down vote up
/**
 * Get an analysis engine for the
 *
 * @param args name-value pairs
 * @return
 * @throws ResourceInitializationException
 */
protected AnalysisEngine getAnalysisEngine(Object... args)
    throws ResourceInitializationException {
  return AnalysisEngineFactory.createEngine(
      annotatorClass, TypeSystemSingleton.getTypeSystemDescriptionInstance(), args);
}