org.apache.uima.fit.factory.AnalysisEngineFactory Java Examples

The following examples show how to use org.apache.uima.fit.factory.AnalysisEngineFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Html5Test.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testLineBreak() throws UIMAException, IOException {
  AnalysisEngine consumer =
      AnalysisEngineFactory.createEngine(
          Html5.class,
          TypeSystemSingleton.getTypeSystemDescriptionInstance(),
          Html5.PARAM_OUTPUT_FOLDER,
          outputFolder.getPath());

  DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
  da.setSourceUri("multiline.txt");

  jCas.setDocumentText("His name was James\n\nBond.");
  Person p = new Person(jCas, 13, 24);
  p.addToIndexes();

  consumer.process(jCas);

  File f = new File(outputFolder, "multiline.txt.html");
  assertTrue(f.exists());

  assertTrue(
      Files.asCharSource(f, StandardCharsets.UTF_8)
          .read()
          .contains("data-referent=\"\">James\n\nBond</span>"));
}
 
Example #2
Source File: BlacklistTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testBlacklistEntityValue() throws Exception {
  AnalysisEngine rneAE =
      AnalysisEngineFactory.createEngine(
          Blacklist.class,
          Blacklist.PARAM_BLACKLIST,
          new String[] {LONDON, UNITED_KINGDOM},
          Blacklist.PARAM_CHECK_ENTITY_VALUE,
          true);
  createDocument(jCas);

  rneAE.process(jCas);

  assertCorrect(1, 1, 0);

  rneAE.destroy();
}
 
Example #3
Source File: SimplePipelineTest.java    From uima-uimafit with Apache License 2.0 6 votes vote down vote up
@Test
public void test1() throws UIMAException, IOException {
  // Creating a CAS locally here to work around UIMA-5097 - otherwise this test may fail if
  // run in Eclipse or in other unit test setups where the same JVM is re-used for multiple tests.
  TypeSystemDescription tsd = TypeSystemDescriptionFactory.createTypeSystemDescription();
  TypePriorities tp = TypePrioritiesFactory.createTypePriorities(new String[] {
      "org.apache.uima.fit.type.Sentence", "org.apache.uima.fit.type.AnalyzedText",
      "org.apache.uima.fit.type.Token" });
  JCas jcas = CasCreationUtils.createCas(tsd, tp, null).getJCas();
  
  CasIOUtil.readJCas(jcas, new File("src/test/resources/data/docs/test.xmi"));
  AnalysisEngineDescription aed1 = AnalysisEngineFactory.createEngineDescription(
          Annotator1.class, typeSystemDescription);
  AnalysisEngineDescription aed2 = AnalysisEngineFactory.createEngineDescription(
          Annotator2.class, typeSystemDescription);
  AnalysisEngineDescription aed3 = AnalysisEngineFactory.createEngineDescription(
          Annotator3.class, typeSystemDescription);
  SimplePipeline.runPipeline(jcas, aed1, aed2, aed3);
}
 
Example #4
Source File: Step0bTextSegmenterA.java    From argument-reasoning-comprehension-task with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a tokenizing pipeline
 *
 * @throws IOException exception
 */
private static AnalysisEngineDescription getPipeline()
        throws IOException
{
    if (pipelineSingleton == null) {
        try {
            pipelineSingleton = AnalysisEngineFactory.createEngineDescription(
                    AnalysisEngineFactory.createEngineDescription(ParagraphSplitter.class,
                            ParagraphSplitter.PARAM_SPLIT_PATTERN,
                            ParagraphSplitter.SINGLE_LINE_BREAKS_PATTERN),
                    AnalysisEngineFactory.createEngineDescription(ArkTweetTokenizerFixed.class),
                    AnalysisEngineFactory.createEngineDescription(StanfordSegmenter.class,
                            StanfordSegmenter.PARAM_WRITE_TOKEN, false,
                            StanfordSegmenter.PARAM_ZONE_TYPES,
                            Paragraph.class.getCanonicalName()));
        }
        catch (ResourceInitializationException e) {
            throw new IOException();
        }
    }

    return pipelineSingleton;
}
 
Example #5
Source File: AbstractBaleenFileConsumerTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testNullBasePath() throws Exception {
  AnalysisEngine consumer =
      AnalysisEngineFactory.createEngine(
          TestFileConsumer.class, TypeSystemSingleton.getTypeSystemDescriptionInstance());

  DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
  da.setSourceUri(FILENAME);

  consumer.process(jCas);

  File f = new File(FILENAME);
  assertTrue(f.exists());

  f.delete();
}
 
Example #6
Source File: CorefCapitalisationAndApostropheTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testMissingValue() throws Exception {
  AnalysisEngine corefCapAE =
      AnalysisEngineFactory.createEngine(CorefCapitalisationAndApostrophe.class);

  jCas.setDocumentText(TEXT);

  Person p1 = new Person(jCas);
  p1.setBegin(0);
  p1.setEnd(5);
  p1.addToIndexes();

  Annotations.createPerson(jCas, 22, 27, JAMES_UC);

  corefCapAE.process(jCas);

  assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size());

  ReferenceTarget rt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0);
  Person p1t = JCasUtil.selectByIndex(jCas, Person.class, 0);
  Person p2t = JCasUtil.selectByIndex(jCas, Person.class, 1);

  assertEquals(rt, p1t.getReferent());
  assertEquals(rt, p2t.getReferent());
}
 
Example #7
Source File: EntityCountTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testEntityCountOutputReadOnly() throws Exception {
  File output = Files.createTempFile("baleen-entitycount", ".tsv").toFile();
  output.setReadOnly();

  try {
    AnalysisEngineFactory.createEngine(
        EntityCount.class,
        TypeSystemSingleton.getTypeSystemDescriptionInstance(),
        OUTPUT_FILE,
        output.getPath());
    fail("Expected exception not thrown");
  } catch (Exception ex) {
    // Do nothing
  }

  output.delete();
}
 
Example #8
Source File: PrintTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testRelations() throws UIMAException {

  final Person s = new Person(jCas);
  s.setValue("source");
  final Location t = new Location(jCas);
  t.setValue("target");

  final Relation r = new Relation(jCas);
  r.setSource(s);
  r.setTarget(t);
  r.setRelationshipType("check");
  r.addToIndexes();

  SimplePipeline.runPipeline(jCas, AnalysisEngineFactory.createEngine(Relations.class));
}
 
Example #9
Source File: AbstractBaleenFileConsumerTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testNoSource() throws Exception {
  File baseDir = Files.createTempDir();

  AnalysisEngine consumer =
      AnalysisEngineFactory.createEngine(
          TestFileConsumer.class,
          TypeSystemSingleton.getTypeSystemDescriptionInstance(),
          BASE_PATH,
          baseDir.getPath(),
          "extension",
          "txt");

  jCas.setDocumentText(TEXT);

  consumer.process(jCas);

  String s =
      FileUtils.file2String(
          new File(
              baseDir, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855.txt"));
  assertEquals(TEXT, s);
}
 
Example #10
Source File: CorefBracketsTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testExistingCoordReferent() throws Exception {
  AnalysisEngine ae = AnalysisEngineFactory.createEngine(CorefBrackets.class);

  jCas.setDocumentText(LOC_TEXT);

  ReferenceTarget rt1 = Annotations.createReferenceTarget(jCas);

  Annotations.createLocation(jCas, 0, 9, SOMEWHERE, null);
  Coordinate c1 = Annotations.createCoordinate(jCas, 11, 19, MRGS);
  c1.setReferent(rt1);

  ae.process(jCas);

  assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size());

  ReferenceTarget rt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0);
  Location l = JCasUtil.selectByIndex(jCas, Location.class, 0);
  Coordinate c = JCasUtil.selectByIndex(jCas, Coordinate.class, 0);

  assertEquals(rt, l.getReferent());
  assertEquals(rt, c.getReferent());
  assertEquals(c.getReferent(), l.getReferent());
}
 
Example #11
Source File: ListTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testmultipleHits() throws Exception {

  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          List.class, List.PARAM_TERMS, terms, List.PARAM_TYPE, LOCATION);

  AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed);

  // the same search term appears multiple times in text...
  jCas.setDocumentText("Hello world, and hello world again.");

  ae.process(jCas);

  assertEquals(2, JCasUtil.select(jCas, Location.class).size());
  Location l = JCasUtil.selectByIndex(jCas, Location.class, 0);
  assertEquals(WORLD, l.getValue());
  assertEquals(WORLD, l.getCoveredText());

  ae.destroy();
}
 
Example #12
Source File: TemporalElasticsearchTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws Exception {
  elasticsearch = new EmbeddedElasticsearch5();

  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          RESOURCE_KEY,
          SharedElasticsearchResource.class,
          PARAM_PORT,
          Integer.toString(elasticsearch.getTransportPort()),
          PARAM_CLUSTER,
          elasticsearch.getClusterName());

  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          TemporalElasticsearch.class,
          TypeSystemSingleton.getTypeSystemDescriptionInstance(),
          RESOURCE_KEY,
          erd,
          PARAM_INDEX,
          TEMPORAL_INDEX);

  ae = AnalysisEngineFactory.createEngine(aed);
}
 
Example #13
Source File: CasDumpWriterTest.java    From uima-uimafit with Apache License 2.0 6 votes vote down vote up
@Test
public void test() throws Exception {
  File outputFile = new File(folder.getRoot(), "dump-output.txt");

  AnalysisEngine writer = AnalysisEngineFactory.createEngine(CasDumpWriter.class,
          CasDumpWriter.PARAM_OUTPUT_FILE, outputFile.getPath());
  JCas jcas = writer.newJCas();
  CasIOUtil.readJCas(jcas, new File("src/test/resources/data/docs/test.xmi"));
  writer.process(jcas);
  assertTrue(outputFile.exists());

  String reference = readFileToString(
          new File("src/test/resources/data/reference/test.xmi.dump"), "UTF-8").trim();
  String actual = readFileToString(outputFile, "UTF-8").trim();
  actual = actual.replaceAll("\r\n", "\n");

  assertEquals(reference, actual);
}
 
Example #14
Source File: CustomResourceTermSuiteAEFactory.java    From termsuite-core with Apache License 2.0 6 votes vote down vote up
/**
 * Spots fixed expressions in the CAS an creates {@link FixedExpression}
 * annotation whenever one is found.
 * 
 * @return
 */
public static AnalysisEngineDescription createFixedExpressionSpotterAEDesc(ResourceConfig resourceConfig, Lang lang)  {
	try {
		AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(
				FixedExpressionSpotter.class,
				FixedExpressionSpotter.FIXED_EXPRESSION_MAX_SIZE, 5,
				FixedExpressionSpotter.REMOVE_WORD_ANNOTATIONS_FROM_CAS, false,
				FixedExpressionSpotter.REMOVE_TERM_OCC_ANNOTATIONS_FROM_CAS, true
			);
		
		ExternalResourceDescription fixedExprRes = ExternalResourceFactory.createExternalResourceDescription(
				FixedExpressionResource.class, 
				getResourceURL(resourceConfig, ResourceType.FIXED_EXPRESSIONS, lang));
		
		ExternalResourceFactory.bindResource(
				ae,
				FixedExpressionResource.FIXED_EXPRESSION_RESOURCE, 
				fixedExprRes
			);
		
		return ae;
	} catch (Exception e) {
		throw new PreparationPipelineException(e);
	}
}
 
Example #15
Source File: EntityCountTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testEntityCountOutputNewFile() throws Exception {
  File outputFolder = Files.createTempDirectory("baleen").toFile();
  File output = new File(outputFolder, "baleen-entitycount.tsv");

  AnalysisEngine consumer =
      AnalysisEngineFactory.createEngine(
          EntityCount.class,
          TypeSystemSingleton.getTypeSystemDescriptionInstance(),
          OUTPUT_FILE,
          output.getPath());

  createDocument();

  consumer.process(jCas);

  assertEquals("test1.txt\t2", FileUtils.file2String(output).trim());

  consumer.destroy();
  output.delete();
  outputFolder.delete();
}
 
Example #16
Source File: Html5Test.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateExternalIdFile() throws UIMAException {
  AnalysisEngine consumer =
      AnalysisEngineFactory.createEngine(
          Html5.class,
          TypeSystemSingleton.getTypeSystemDescriptionInstance(),
          Html5.PARAM_OUTPUT_FOLDER,
          outputFolder.getPath(),
          Html5.PARAM_USE_EXTERNAL_ID,
          true,
          Html5.PARAM_CONTENT_HASH_AS_ID,
          false);

  jCas.setDocumentText("Hello World!");
  DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs();
  da.setSourceUri("hello.txt");

  consumer.process(jCas);

  File f =
      new File(
          outputFolder, "734cad14909bedfafb5b273b6b0eb01fbfa639587d217f78ce9639bba41f4415.html");
  assertTrue(f.exists());
}
 
Example #17
Source File: CorefCapitalisationAndApostropheTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testOneExistingReferent() throws Exception {
  AnalysisEngine corefCapAE =
      AnalysisEngineFactory.createEngine(CorefCapitalisationAndApostrophe.class);

  jCas.setDocumentText(TEXT);

  ReferenceTarget rt = Annotations.createReferenceTarget(jCas);
  Person p1 = Annotations.createPerson(jCas, 0, 5, JAMES);
  p1.setReferent(rt);
  Annotations.createPerson(jCas, 22, 27, JAMES_UC);

  corefCapAE.process(jCas);

  assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size());

  ReferenceTarget rtt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0);
  Person p1t = JCasUtil.selectByIndex(jCas, Person.class, 0);
  Person p2t = JCasUtil.selectByIndex(jCas, Person.class, 1);

  assertEquals(rtt, p1t.getReferent());
  assertEquals(rtt, p2t.getReferent());
}
 
Example #18
Source File: BannerAETest.java    From bluima with Apache License 2.0 6 votes vote down vote up
@Test
public void testView() throws Exception {

    JCas jcas = getTestCas("empty!");

    JCas newView = jcas.createView("bla");
    newView.setDocumentText(TEST_SENTENCE);
    createAnnot(newView, Sentence.class, 0, TEST_SENTENCE.length());

    AnalysisEngine ss = AnalysisEngineFactory
            .createEngine(NaiveSentenceSplitterAnnotator.class);

    AnalysisEngine banner = AnalysisEngineFactory.createEngine(
            BannerAnnotator.class, PARAM_VIEW, "bla");

    SimplePipeline.runPipeline(jcas, ss, banner);

    Collection<Protein> prots = select(jcas, Protein.class);
    assertEquals("nothin in system view", 0, prots.size());

    prots = select(jcas.getView("bla"), Protein.class);
    assertEquals("one protein in bla view", 1, prots.size());
}
 
Example #19
Source File: ListTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void test() throws Exception {

  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          List.class, List.PARAM_TERMS, terms, List.PARAM_TYPE, LOCATION);

  AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed);

  jCas.setDocumentText("Hello world, this is a test");

  ae.process(jCas);

  assertEquals(1, JCasUtil.select(jCas, Location.class).size());
  Location l = JCasUtil.selectByIndex(jCas, Location.class, 0);
  assertEquals(WORLD, l.getValue());
  assertEquals(WORLD, l.getCoveredText());

  ae.destroy();
}
 
Example #20
Source File: CustomResourceTermSuiteAEFactory.java    From termsuite-core with Apache License 2.0 5 votes vote down vote up
public static AnalysisEngineDescription createMateAEDesc(ResourceConfig resourceConfig, Lang lang, Path mateModelPath) {
	try {
		AnalysisEngineDescription mateTaggerAE = AnalysisEngineFactory.createEngineDescription(
			MateLemmatizerTagger.class
		);
		
		String lemmatizerModel = mateModelPath.resolve("mate-lemma-"+lang.getCode()+".model").toString();
		String taggerModel = mateModelPath.resolve("mate-pos-"+lang.getCode()+".model").toString();
		Preconditions.checkArgument(Files.exists(Paths.get(lemmatizerModel)), "Lemmatizer model does not exist: %s", lemmatizerModel);
		Preconditions.checkArgument(Files.exists(Paths.get(taggerModel)), "Tagger model does not exist: %s", taggerModel);

		ExternalResourceFactory.createDependencyAndBind(
				mateTaggerAE,
				MateLemmatizerTagger.LEMMATIZER, 
				MateLemmatizerModel.class, 
				lemmatizerModel);
		ExternalResourceFactory.createDependencyAndBind(
				mateTaggerAE,
				MateLemmatizerTagger.TAGGER, 
				MateTaggerModel.class, 
				taggerModel);
		
		AnalysisEngineDescription lemmaFixerAE = AnalysisEngineFactory.createEngineDescription(
				MateLemmaFixer.class,
				MateLemmaFixer.LANGUAGE, lang.getCode()
			);

		AnalysisEngineDescription normalizerAE = createNormalizerAE(resourceConfig, lang, Tagger.MATE);

		return AnalysisEngineFactory.createEngineDescription(
				mateTaggerAE,
				lemmaFixerAE, 
				normalizerAE);

	} catch (Exception e) {
		throw new TermSuiteException(e);
	}
}
 
Example #21
Source File: CpeBuilder.java    From bluima with Apache License 2.0 5 votes vote down vote up
@Override
public void add(Class<? extends JCasAnnotator_ImplBase> annotatorClass,
        Object... configurationData) throws InvalidXMLException,
        ResourceInitializationException, IOException, SAXException,
        CpeDescriptorException {
    add(AnalysisEngineFactory.createEngineDescription(annotatorClass,
            configurationData));
}
 
Example #22
Source File: MongoRegexTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws Exception {
  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          MONGO,
          SharedFongoResource.class,
          FONGO_COLLECTION,
          MONGO_COLL,
          FONGO_DATA,
          objectMapper.writeValueAsString(GAZ_DATA));
  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          MongoRegex.class,
          MONGO,
          erd,
          COLLECTION,
          MONGO_COLL,
          TYPE,
          LOCATION,
          "caseSensitive",
          true,
          REGEX,
          LONDON_REGEX);

  AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed);

  jCas.setDocumentText(TEXT);

  ae.process(jCas);

  assertEquals(0, JCasUtil.select(jCas, Location.class).size());

  ae.destroy();
}
 
Example #23
Source File: PoStagger.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public static AnalysisEngineDescription getDescription(String languageCode) throws ResourceInitializationException {
    String modelPath = String.format("/models/%s-pos-maxent.bin", languageCode);
    return AnalysisEngineFactory.createEngineDescription(PoStagger.class,
                    opennlp.uima.util.UimaUtil.MODEL_PARAMETER,
                    ExternalResourceFactory.createExternalResourceDescription(POSModelResourceImpl.class,
                                    PoStagger.class.getResource(modelPath).toString()),
                    opennlp.uima.util.UimaUtil.SENTENCE_TYPE_PARAMETER, Sentence.class.getName(),
                    opennlp.uima.util.UimaUtil.TOKEN_TYPE_PARAMETER, Token.class.getName(),
                    opennlp.uima.util.UimaUtil.POS_FEATURE_PARAMETER, "pos");
}
 
Example #24
Source File: ListTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testReference() throws Exception {
  // This test demonstrates the case where whitespace is preserved in gazetteer matching.

  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          List.class,
          List.PARAM_TERMS,
          terms,
          List.PARAM_TYPE,
          LOCATION,
          List.PARAM_EXACT_WHITESPACE,
          false);

  AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed);

  // words in term to search for separated by multiple spaces, tabs or newline...
  jCas.setDocumentText("This text mentions New York (also known as NY and the Big Apple).");

  ae.process(jCas);

  // 3 mentions of "New York" and nicknames...
  assertEquals(3, JCasUtil.select(jCas, Location.class).size());
  // ...but they're all the same entity, so only one ReferenceTarget
  assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size());

  Location l = JCasUtil.selectByIndex(jCas, Location.class, 0);
  assertEquals(NEW_YORK, l.getValue());

  ae.destroy();
}
 
Example #25
Source File: PosUimaTokenizer.java    From Canova with Apache License 2.0 5 votes vote down vote up
public static AnalysisEngine defaultAnalysisEngine()  {
    try {
        return AnalysisEngineFactory.createEngine(AnalysisEngineFactory.createEngineDescription(SentenceAnnotator.getDescription(), TokenizerAnnotator.getDescription(), PoStagger.getDescription("en"), StemmerAnnotator.getDescription("English")));
    }catch(Exception e) {
        throw new RuntimeException(e);
    }
}
 
Example #26
Source File: ElasticsearchTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Before
public void before() throws Exception {

  elasticsearch = new EmbeddedElasticsearch5();

  final ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          RESOURCE_KEY,
          SharedElasticsearchResource.class,
          PARAM_CLUSTER,
          elasticsearch.getClusterName(),
          PARAM_PORT,
          Integer.toString(elasticsearch.getTransportPort()));
  final ExternalResourceDescription idErd =
      ExternalResourceFactory.createNamedResourceDescription(
          SharedIdGenerator.RESOURCE_KEY, SharedIdGenerator.class);

  final AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          Elasticsearch.class,
          TypeSystemSingleton.getTypeSystemDescriptionInstance(),
          RESOURCE_KEY,
          erd,
          SharedIdGenerator.RESOURCE_KEY,
          idErd);

  ae = AnalysisEngineFactory.createEngine(aed);
  ae.initialize(new CustomResourceSpecifier_impl(), Collections.emptyMap());

  assertEquals(0, countTypeInIndex(Elasticsearch.DEFAULT_DOCUMENT_TYPE));
  assertEquals(0, countTypeInIndex(Elasticsearch.DEFAULT_ENTITY_TYPE));
  assertEquals(0, countTypeInIndex(Elasticsearch.DEFAULT_RELATION_TYPE));
  assertEquals(0, countTypeInIndex(Elasticsearch.DEFAULT_MENTION_TYPE));
}
 
Example #27
Source File: CreateDbWriterDescriptor.java    From ctakes-docker with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws ResourceInitializationException, IOException, SAXException {
    Map<String,String> env = System.getenv();
    ExternalResourceDescription erd = ExternalResourceFactory.createExternalResourceDescription(
            JdbcConnectionResourceImpl.class,
            "null",   // method is ambiguous because all strings are objects so this is here as the unneede (i think) aURL argument
            JdbcConnectionResourceImpl.PARAM_DRIVER_CLASS,
            "oracle.jdbc.OracleDriver",
            JdbcConnectionResourceImpl.PARAM_URL,
            "jdbc:oracle:thin:@" + System.getProperty("oracle_host"),
            JdbcConnectionResourceImpl.PARAM_USERNAME,
            System.getProperty("oracle_user"),
            JdbcConnectionResourceImpl.PARAM_PASSWORD,
            System.getProperty("oracle_pw"),
            JdbcConnectionResourceImpl.PARAM_KEEP_ALIVE,
            "false",
            AbstractJdbcWriter.PARAM_DB_CONN_RESRC,
            "DbConnectionWrite");

    AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription(I2b2JdbcWriter.class,
            I2b2JdbcWriter.PARAM_VECTOR_TABLE,
            System.getProperty("oracle_table"),
            AbstractJdbcWriter.PARAM_DB_CONN_RESRC,
            erd
            );

    aed.toXML(new FileWriter(args[0]));
}
 
Example #28
Source File: ListTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testWhitespaceNormalized() throws Exception {
  // This test demonstrates the case where whitespace is preserved in gazetteer matching.

  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          List.class,
          List.PARAM_TERMS,
          terms,
          List.PARAM_TYPE,
          LOCATION,
          List.PARAM_EXACT_WHITESPACE,
          false);

  AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed);

  // words in term to search for separated by multiple spaces, tabs or newline...
  jCas.setDocumentText(
      "This text mentions New York, and New    York again, and New	York again, and New \nYork yet again");

  ae.process(jCas);

  // Three mentions of "New York" if we reduce any whitespace to a single space (exactWhitespace
  // parameter, which ignores new lines)
  assertEquals(3, JCasUtil.select(jCas, Location.class).size());
  Location l = JCasUtil.selectByIndex(jCas, Location.class, 0);
  assertEquals(NEW_YORK, l.getValue());

  ae.destroy();
}
 
Example #29
Source File: GenerateDescriptors.java    From ctakes-docker with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    AggregateBuilder builder = new AggregateBuilder();
    builder.add(AnalysisEngineFactory.createEngineDescriptionFromPath("desc/remoteMist.xml"));

    AnalysisEngineDescription aed = builder.createAggregateDescription();
    aed.toXML(new FileWriter(args[0]));
}
 
Example #30
Source File: ListTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testWhitespaceExact() throws Exception {
  // This test demonstrates the case where whitespace is preserved in gazetteer matching.

  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          List.class,
          List.PARAM_TERMS,
          terms,
          List.PARAM_TYPE,
          LOCATION,
          List.PARAM_EXACT_WHITESPACE,
          true);

  AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed);

  // words in term to search for separated by multiple spaces, tabs or newline...
  jCas.setDocumentText(
      "This text mentions New York, and New    York again, and New	York again, and New \nYork yet again");

  ae.process(jCas);

  // only one mention of "New York" has the two words separated by a single space (as in the
  // gazetteer)
  assertEquals(1, JCasUtil.select(jCas, Location.class).size());
  Location l = JCasUtil.selectByIndex(jCas, Location.class, 0);
  assertEquals(NEW_YORK, l.getValue());

  ae.destroy();
}