Java Code Examples for org.apache.uima.fit.factory.ExternalResourceFactory#createNamedResourceDescription()

The following examples show how to use org.apache.uima.fit.factory.ExternalResourceFactory#createNamedResourceDescription() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MongoParagraphsTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws ResourceInitializationException, ResourceAccessException {
  // Create a description of an external resource - a fongo instance, in the same way we would
  // have created a shared mongo resource
  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          MONGO, SharedFongoResource.class, "fongo.collection", "paragraphs", "fongo.data", "[]");

  // Create the analysis engine
  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(MongoParagraph.class, MONGO, erd);
  ae = AnalysisEngineFactory.createEngine(aed);
  ae.initialize(new CustomResourceSpecifier_impl(), Collections.emptyMap());

  SharedFongoResource sfr = (SharedFongoResource) ae.getUimaContext().getResourceObject(MONGO);
  paragraphs = sfr.getDB().getCollection("paragraphs");

  // Ensure we start with no data!
  assertEquals(0L, paragraphs.count());
}
 
Example 2
Source File: LocationElasticsearchTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws Exception {

  jCas.reset();
  elasticsearch = new EmbeddedElasticsearch5();

  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          RESOURCE_KEY,
          SharedElasticsearchResource.class,
          PARAM_PORT,
          Integer.toString(elasticsearch.getTransportPort()),
          PARAM_CLUSTER,
          elasticsearch.getClusterName());

  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          LocationElasticsearch.class,
          TypeSystemSingleton.getTypeSystemDescriptionInstance(),
          RESOURCE_KEY,
          erd,
          PARAM_INDEX,
          LOCATION_INDEX);

  ae = AnalysisEngineFactory.createEngine(aed);
}
 
Example 3
Source File: MongoSentencesTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws ResourceInitializationException, ResourceAccessException {
  // Create a description of an external resource - a fongo instance, in the same way we would
  // have created a shared mongo resource
  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          MONGO, SharedFongoResource.class, "fongo.collection", "sentences", "fongo.data", "[]");

  // Create the analysis engine
  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(MongoSentences.class, MONGO, erd);
  ae = AnalysisEngineFactory.createEngine(aed);
  ae.initialize(new CustomResourceSpecifier_impl(), Collections.emptyMap());

  SharedFongoResource sfr = (SharedFongoResource) ae.getUimaContext().getResourceObject(MONGO);
  sentences = sfr.getDB().getCollection("sentences");

  // Ensure we start with no data!
  assertEquals(0L, sentences.count());
}
 
Example 4
Source File: NPAtCoordinateTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Before
public void before() throws UIMAException {
  ExternalResourceDescription tokensDesc =
      ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class);
  ExternalResourceDescription sentencesDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "sentences", SharedOpenNLPModel.class);
  ExternalResourceDescription posDesc =
      ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class);
  ExternalResourceDescription chunksDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "phraseChunks", SharedOpenNLPModel.class);

  AnalysisEngineDescription desc =
      AnalysisEngineFactory.createEngineDescription(
          OpenNLP.class,
          "tokens",
          tokensDesc,
          "sentences",
          sentencesDesc,
          "posTags",
          posDesc,
          "phraseChunks",
          chunksDesc);

  languageAE = AnalysisEngineFactory.createEngine(desc);
}
 
Example 5
Source File: FileTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testPlurals() throws Exception {
  // This test demonstrates pluralisation in the gazetteer

  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          FILE_GAZETTEER, SharedFileResource.class);
  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          File.class,
          FILE_GAZETTEER,
          erd,
          FILE_NAME,
          getClass().getResource(GAZETTEER_TXT).getPath(),
          TYPE,
          LOCATION,
          "plural",
          true);

  AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed);

  jCas.setDocumentText(
      "There may be many New Yorks and many Parises, but there's only one London.");

  ae.process(jCas);

  assertEquals(3, JCasUtil.select(jCas, Location.class).size());
  Location l1 = JCasUtil.selectByIndex(jCas, Location.class, 0);
  Location l2 = JCasUtil.selectByIndex(jCas, Location.class, 1);
  Location l3 = JCasUtil.selectByIndex(jCas, Location.class, 2);
  assertEquals("New Yorks", l1.getValue());
  assertEquals("Parises", l2.getValue());
  assertEquals("London", l3.getValue());

  ae.destroy();
}
 
Example 6
Source File: MongoTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws ResourceInitializationException, ResourceAccessException {
  // Create a description of an external resource - a fongo instance, in the same way we would
  // have created a shared mongo resource
  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          MONGO, SharedFongoResource.class, "fongo.collection", "test", "fongo.data", "[]");
  ExternalResourceDescription historyErd =
      ExternalResourceFactory.createNamedResourceDescription(
          PipelineBuilder.BALEEN_HISTORY, InMemoryBaleenHistory.class);

  history = Mockito.mock(BaleenHistory.class);

  // Create the analysis engine
  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          Mongo.class,
          MONGO,
          erd,
          "collection",
          "test",
          PipelineBuilder.BALEEN_HISTORY,
          historyErd,
          "outputHistory",
          Boolean.TRUE);
  ae = AnalysisEngineFactory.createEngine(aed);
  ae.initialize(new CustomResourceSpecifier_impl(), Collections.emptyMap());
  SharedFongoResource sfr = (SharedFongoResource) ae.getUimaContext().getResourceObject(MONGO);
  history = (BaleenHistory) ae.getUimaContext().getResourceObject(PipelineBuilder.BALEEN_HISTORY);

  entities = sfr.getDB().getCollection("entities");
  documents = sfr.getDB().getCollection("documents");
  relations = sfr.getDB().getCollection("relations");

  // Ensure we start with no data!
  assertEquals(0L, documents.count());
  assertEquals(0L, entities.count());
  assertEquals(0L, relations.count());
}
 
Example 7
Source File: OpenNLPTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
public void beforeTest() throws UIMAException {
  super.beforeTest();

  ExternalResourceDescription tokensDesc =
      ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class);
  ExternalResourceDescription sentencesDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "sentences", SharedOpenNLPModel.class);
  ExternalResourceDescription posDesc =
      ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class);
  ExternalResourceDescription chunksDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "phraseChunks", SharedOpenNLPModel.class);

  AnalysisEngineDescription descLanguage =
      AnalysisEngineFactory.createEngineDescription(
          uk.gov.dstl.baleen.annotators.language.OpenNLP.class,
          "tokens",
          tokensDesc,
          "sentences",
          sentencesDesc,
          "posTags",
          posDesc,
          "phraseChunks",
          chunksDesc);
  aeLanguage = AnalysisEngineFactory.createEngine(descLanguage);

  String text =
      "This is a mention of John Smith visiting Thomas Brown at the United Nations in New York on the afternoon of February 10th, 2014.";

  jCas.setDocumentText(text);
}
 
Example 8
Source File: MongoRegexTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws Exception {
  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          MONGO,
          SharedFongoResource.class,
          FONGO_COLLECTION,
          MONGO_COLL,
          FONGO_DATA,
          objectMapper.writeValueAsString(GAZ_DATA));
  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          MongoRegex.class,
          MONGO,
          erd,
          COLLECTION,
          MONGO_COLL,
          TYPE,
          LOCATION,
          "caseSensitive",
          true,
          REGEX,
          LONDON_REGEX);

  AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed);

  jCas.setDocumentText(TEXT);

  ae.process(jCas);

  assertEquals(0, JCasUtil.select(jCas, Location.class).size());

  ae.destroy();
}
 
Example 9
Source File: OpenNLPParserTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
protected AnalysisEngine[] createAnalysisEngines() throws ResourceInitializationException {

  final ExternalResourceDescription parserChunkingDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "parserChunking", SharedOpenNLPModel.class);

  // Add in the OpenNLP implementation too, as its a prerequisite
  // (in theory we should test OpenNLPParser in isolation, but in practise
  // it as this as a
  // dependency
  // so better test they work together)

  final ExternalResourceDescription tokensDesc =
      ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class);
  final ExternalResourceDescription sentencesDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "sentences", SharedOpenNLPModel.class);
  final ExternalResourceDescription posDesc =
      ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class);
  final ExternalResourceDescription chunksDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "phraseChunks", SharedOpenNLPModel.class);

  AnalysisEngineFactory.createEngineDescription();

  return asArray(
      createAnalysisEngine(
          OpenNLP.class,
          "tokens",
          tokensDesc,
          "sentences",
          sentencesDesc,
          "posTags",
          posDesc,
          "phraseChunks",
          chunksDesc),
      createAnalysisEngine(OpenNLPParser.class, "parserChunking", parserChunkingDesc));
}
 
Example 10
Source File: MongoStemmingTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void setup() throws JsonProcessingException {
  erd =
      ExternalResourceFactory.createNamedResourceDescription(
          MONGO,
          SharedFongoResource.class,
          FONGO_COLLECTION,
          MONGO_COLL,
          FONGO_DATA,
          new ObjectMapper().writeValueAsString(GAZ_DATA));
}
 
Example 11
Source File: MongoStatsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testNewFile()
    throws ResourceInitializationException, AnalysisEngineProcessException, IOException {
  // Due to limitations in the shared fongo resource we only test document count here!
  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          "mongo",
          SharedFongoResource.class,
          SharedFongoResource.PARAM_FONGO_COLLECTION,
          "documents",
          SharedFongoResource.PARAM_FONGO_DATA,
          objectMapper.writeValueAsString(DATA));

  File tempFile = File.createTempFile("test", "mongostats");
  tempFile.delete();
  try {

    AnalysisEngine task =
        create(MongoStats.class, "mongo", erd, "file", tempFile.getAbsolutePath());
    execute(task);
    task.destroy();

    List<String> lines = Files.readAllLines(tempFile.toPath());
    assertEquals(2, lines.size());
    assertEquals("timestamp,documents,entities,relations", lines.get(0));

    String[] split = lines.get(1).split(",");
    assertEquals("3", split[1]);
    assertEquals("0", split[2]);
    assertEquals("0", split[3]);
  } finally {
    tempFile.delete();
  }
}
 
Example 12
Source File: NPElementTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Before
public void before() throws UIMAException {
  ExternalResourceDescription tokensDesc =
      ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class);
  ExternalResourceDescription sentencesDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "sentences", SharedOpenNLPModel.class);
  ExternalResourceDescription posDesc =
      ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class);
  ExternalResourceDescription chunksDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "phraseChunks", SharedOpenNLPModel.class);

  AnalysisEngineDescription desc =
      AnalysisEngineFactory.createEngineDescription(
          OpenNLP.class,
          "tokens",
          tokensDesc,
          "sentences",
          sentencesDesc,
          "posTags",
          posDesc,
          "phraseChunks",
          chunksDesc);

  languageAE = AnalysisEngineFactory.createEngine(desc);
}
 
Example 13
Source File: HmsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Before
public void before() throws UIMAException {
  ExternalResourceDescription tokensDesc =
      ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class);
  ExternalResourceDescription sentencesDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "sentences", SharedOpenNLPModel.class);
  ExternalResourceDescription posDesc =
      ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class);
  ExternalResourceDescription chunksDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "phraseChunks", SharedOpenNLPModel.class);

  AnalysisEngineDescription desc =
      AnalysisEngineFactory.createEngineDescription(
          OpenNLP.class,
          "tokens",
          tokensDesc,
          "sentences",
          sentencesDesc,
          "posTags",
          posDesc,
          "phraseChunks",
          chunksDesc);

  languageAE = AnalysisEngineFactory.createEngine(desc);
}
 
Example 14
Source File: RakeKeywordsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testMaxNumber()
    throws ResourceInitializationException, AnalysisEngineProcessException {
  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          STOPWORDS, SharedStopwordResource.class);
  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          RakeKeywords.class,
          STOPWORDS,
          erd,
          RakeKeywords.PARAM_MAX_KEYWORDS,
          3,
          RakeKeywords.PARAM_ADD_BUZZWORDS,
          false);

  AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed);

  jCas.setDocumentText(
      "Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given. These criteria and the corresponding algorithms for contructing a minimal supporting set of solutions can be used in solving all the considered types of systems and systems of mixed types.");
  ae.process(jCas);

  JCasMetadata metadata = new JCasMetadata(jCas);
  Set<String> keywords = metadata.findAll(KEYWORD_METADATA_KEY);

  assertEquals(3, keywords.size());
  assertTrue(keywords.contains("minimal generating sets"));
  assertTrue(keywords.contains("linear diophantine equations"));
  assertTrue(keywords.contains("minimal supporting set"));

  ae.destroy();
}
 
Example 15
Source File: BaleenCollectionReaderTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws Exception {

  ExternalResourceDescription contentExtractor =
      ExternalResourceFactory.createNamedResourceDescription(
          KEY_CONTENT_EXTRACTOR, FakeBaleenContentExtractor.class);

  FakeCollectionReader cr =
      (FakeCollectionReader)
          CollectionReaderFactory.createReader(
              FakeCollectionReader.class,
              BaleenCollectionReader.KEY_CONTENT_EXTRACTOR,
              contentExtractor);

  UimaContext context = UimaContextFactory.createUimaContext();
  cr.initialize(context);
  assertTrue(cr.initialised);

  assertNotNull(cr.getSupport());
  assertNotNull(cr.getMonitor());
  assertNotNull(cr.getProgress());

  Progress[] progress = cr.getProgress();
  assertEquals("testunits", progress[0].getUnit());

  assertFalse(cr.hasNext());
  assertTrue(cr.hasNext);

  cr.getNext((JCas) null);
  assertTrue(cr.getNext);

  cr.destroy();
  assertTrue(cr.closed);
}
 
Example 16
Source File: AbstractRegexNPAnnotatorTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testChunks() throws Exception {
  ExternalResourceDescription tokensDesc =
      ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class);
  ExternalResourceDescription sentencesDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "sentences", SharedOpenNLPModel.class);
  ExternalResourceDescription posDesc =
      ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class);
  ExternalResourceDescription chunksDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "phraseChunks", SharedOpenNLPModel.class);

  AnalysisEngineDescription descNLP =
      AnalysisEngineFactory.createEngineDescription(
          OpenNLP.class,
          "tokens",
          tokensDesc,
          "sentences",
          sentencesDesc,
          "posTags",
          posDesc,
          "phraseChunks",
          chunksDesc);
  AnalysisEngine aeNLP = AnalysisEngineFactory.createEngine(descNLP);

  AnalysisEngine ae = AnalysisEngineFactory.createEngine(TestAnnotator.class);

  jCas.setDocumentText("PERSON JOHN SMITH WAS SEEN ENTERING THE WAREHOUSE");
  aeNLP.process(jCas);
  ae.process(jCas);

  assertEquals(1, JCasUtil.select(jCas, Person.class).size());
  assertEquals("JOHN SMITH", JCasUtil.selectByIndex(jCas, Person.class, 0).getValue());
}
 
Example 17
Source File: UploadInteractionsToMongoTest.java    From baleen with Apache License 2.0 4 votes vote down vote up
@Before
public void before() {
  fongoErd =
      ExternalResourceFactory.createNamedResourceDescription(
          "mongo", SharedFongoResource.class, "fongo.collection", "na", "fongo.data", "[  ]");
}
 
Example 18
Source File: TokenFrequencySummarisationTest.java    From baleen with Apache License 2.0 4 votes vote down vote up
@Override
protected AnalysisEngine[] createAnalysisEngines() throws ResourceInitializationException {
  ExternalResourceDescription tokensDesc =
      ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class);
  ExternalResourceDescription sentencesDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "sentences", SharedOpenNLPModel.class);
  ExternalResourceDescription posDesc =
      ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class);
  ExternalResourceDescription chunksDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "phraseChunks", SharedOpenNLPModel.class);

  ExternalResourceDescription stopwordsDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "stopwords", SharedStopwordResource.class);

  ExternalResourceDescription wordnetDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "wordnet", SharedWordNetResource.class);

  AnalysisEngineDescription openNlpAnalysisEngineDescription =
      AnalysisEngineFactory.createEngineDescription(
          OpenNLP.class,
          "tokens",
          tokensDesc,
          "sentences",
          sentencesDesc,
          "posTags",
          posDesc,
          "phraseChunks",
          chunksDesc);

  AnalysisEngineDescription wordNetLemmatizerAnalysisEngineDescription =
      AnalysisEngineFactory.createEngineDescription(
          WordNetLemmatizer.class, "wordnet", wordnetDesc);

  AnalysisEngineDescription tokenFrequencySummarisationAnalysisEngineDescription =
      AnalysisEngineFactory.createEngineDescription(
          TokenFrequencySummarisation.class, "stopwords", stopwordsDesc);

  AnalysisEngine openNlpAnalysisEngine =
      AnalysisEngineFactory.createEngine(openNlpAnalysisEngineDescription);
  AnalysisEngine wordNetLemmatizerAnalysisEngine =
      AnalysisEngineFactory.createEngine(wordNetLemmatizerAnalysisEngineDescription);
  AnalysisEngine tokenFrequencySummarisationAnalysisEngine =
      AnalysisEngineFactory.createEngine(tokenFrequencySummarisationAnalysisEngineDescription);

  return new AnalysisEngine[] {
    openNlpAnalysisEngine,
    wordNetLemmatizerAnalysisEngine,
    tokenFrequencySummarisationAnalysisEngine
  };
}
 
Example 19
Source File: ReNounPatternGenerationTest.java    From baleen with Apache License 2.0 4 votes vote down vote up
@Override
protected AnalysisEngine[] createAnalysisEngines() throws ResourceInitializationException {

  ImmutableList<String> data =
      ImmutableList.of(
          new Document()
              .append("sourceValue", "Google")
              .append("value", "CEO")
              .append("targetValue", "Larry Page")
              .toJson(),
          new Document()
              .append("sourceValue", "Alphabet Limited")
              .append("value", "chief executive officer")
              .append("targetValue", "Larry Page")
              .toJson());

  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          MONGO,
          SharedFongoResource.class,
          PARAM_FONGO_COLLECTION,
          "renoun_facts",
          PARAM_FONGO_DATA,
          data.toString());

  // Create the analysis engine

  // Use OpenNlp to generate the POS etc for us
  final ExternalResourceDescription tokensDesc =
      ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class);
  final ExternalResourceDescription sentencesDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "sentences", SharedOpenNLPModel.class);
  final ExternalResourceDescription posDesc =
      ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class);
  final ExternalResourceDescription chunksDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "phraseChunks", SharedOpenNLPModel.class);

  AnalysisEngine ae = createAnalysisEngine(ReNounPatternDataGenerator.class, MONGO, erd);
  try {
    sfr = (SharedFongoResource) ae.getUimaContext().getResourceObject(MONGO);
    output = sfr.getDB().getCollection("renoun_patterns");
  } catch (ResourceAccessException e) {
    throw new ResourceInitializationException(e);
  }

  return asArray(
      createAnalysisEngine(
          OpenNLP.class,
          "tokens",
          tokensDesc,
          "sentences",
          sentencesDesc,
          "posTags",
          posDesc,
          "phraseChunks",
          chunksDesc),
      createAnalysisEngine(MaltParser.class),
      ae);
}
 
Example 20
Source File: MaxEntClassifierTrainerTest.java    From baleen with Apache License 2.0 4 votes vote down vote up
@Before
public void before()
    throws URISyntaxException, ResourceInitializationException, AnalysisEngineProcessException,
        ResourceAccessException {

  ExternalResourceDescription stopWordsErd =
      ExternalResourceFactory.createNamedResourceDescription(
          MaxEntClassifierTrainer.KEY_STOPWORDS, SharedStopwordResource.class);

  List<String> data = new TestData().asList();

  try {
    modelPath = Files.createTempFile("model", ".mallet");
  } catch (IOException e) {
    throw new ResourceInitializationException(e);
  }

  ExternalResourceDescription fongoErd =
      ExternalResourceFactory.createNamedResourceDescription(
          SharedMongoResource.RESOURCE_KEY,
          SharedFongoResource.class,
          "fongo.collection",
          COLLECTION,
          "fongo.data",
          data.toString());

  final AnalysisEngine ae =
      create(
          MaxEntClassifierTrainer.class,
          KEY_STOPWORDS,
          stopWordsErd,
          SharedMongoResource.RESOURCE_KEY,
          fongoErd,
          PARAM_LABELS_FILE,
          Paths.get(LABELS_URL.toURI()).toString(),
          PARAM_DOCUMENT_COLLECTION,
          COLLECTION,
          PARAM_MODEL_FILE,
          modelPath.toString());

  execute(ae);

  SharedFongoResource sfr =
      (SharedFongoResource)
          ae.getUimaContext().getResourceObject(SharedMongoResource.RESOURCE_KEY);
  documents = sfr.getDB().getCollection(COLLECTION);
}