org.apache.uima.fit.factory.ExternalResourceFactory Java Examples

The following examples show how to use org.apache.uima.fit.factory.ExternalResourceFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FixedExpressionSpotterSpec.java    From termsuite-core with Apache License 2.0 6 votes vote down vote up
private AnalysisEngine makeAE(boolean removeWordAnnotationFromCas, boolean removeTermOccAnnotationFromCas) throws Exception {
	AnalysisEngineDescription aeDesc = AnalysisEngineFactory.createEngineDescription(
			FixedExpressionSpotter.class,
			FixedExpressionSpotter.FIXED_EXPRESSION_MAX_SIZE, 5,
			FixedExpressionSpotter.REMOVE_WORD_ANNOTATIONS_FROM_CAS, removeWordAnnotationFromCas,
			FixedExpressionSpotter.REMOVE_TERM_OCC_ANNOTATIONS_FROM_CAS, removeTermOccAnnotationFromCas
		);
	
	/*
	 * The term index resource
	 */
	ExternalResourceDescription fixedExpressionDesc = ExternalResourceFactory.createExternalResourceDescription(
			FixedExpressionResource.FIXED_EXPRESSION_RESOURCE,
			FixedExpressionResource.class, 
			"file:fr/univnantes/termsuite/test/resources/french-fixed-expressions.txt"
	);
	ExternalResourceFactory.bindResource(aeDesc, fixedExpressionDesc);

	AnalysisEngine ae = AnalysisEngineFactory.createEngine(aeDesc);
	return ae;
}
 
Example #2
Source File: MongoParagraphsTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws ResourceInitializationException, ResourceAccessException {
  // Create a description of an external resource - a fongo instance, in the same way we would
  // have created a shared mongo resource
  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          MONGO, SharedFongoResource.class, "fongo.collection", "paragraphs", "fongo.data", "[]");

  // Create the analysis engine
  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(MongoParagraph.class, MONGO, erd);
  ae = AnalysisEngineFactory.createEngine(aed);
  ae.initialize(new CustomResourceSpecifier_impl(), Collections.emptyMap());

  SharedFongoResource sfr = (SharedFongoResource) ae.getUimaContext().getResourceObject(MONGO);
  paragraphs = sfr.getDB().getCollection("paragraphs");

  // Ensure we start with no data!
  assertEquals(0L, paragraphs.count());
}
 
Example #3
Source File: CustomResourceTermSuiteAEFactory.java    From termsuite-core with Apache License 2.0 6 votes vote down vote up
/**
 * Spots fixed expressions in the CAS an creates {@link FixedExpression}
 * annotation whenever one is found.
 * 
 * @return
 */
public static AnalysisEngineDescription createFixedExpressionSpotterAEDesc(ResourceConfig resourceConfig, Lang lang)  {
	try {
		AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(
				FixedExpressionSpotter.class,
				FixedExpressionSpotter.FIXED_EXPRESSION_MAX_SIZE, 5,
				FixedExpressionSpotter.REMOVE_WORD_ANNOTATIONS_FROM_CAS, false,
				FixedExpressionSpotter.REMOVE_TERM_OCC_ANNOTATIONS_FROM_CAS, true
			);
		
		ExternalResourceDescription fixedExprRes = ExternalResourceFactory.createExternalResourceDescription(
				FixedExpressionResource.class, 
				getResourceURL(resourceConfig, ResourceType.FIXED_EXPRESSIONS, lang));
		
		ExternalResourceFactory.bindResource(
				ae,
				FixedExpressionResource.FIXED_EXPRESSION_RESOURCE, 
				fixedExprRes
			);
		
		return ae;
	} catch (Exception e) {
		throw new PreparationPipelineException(e);
	}
}
 
Example #4
Source File: LexicaHelper.java    From bluima with Apache License 2.0 6 votes vote down vote up
public static AnalysisEngineDescription getConceptMapper(String path,
        AnalysisEngineDescription tokenDesc) throws UIMAException,
        IOException, SAXException {

    String conceptMapper = LEXICA_ROOT + "desc/" + path
            + "ConceptMapper.xml";
    checkArgument(new File(conceptMapper).exists(),
            "no ConceptMapper file at " + conceptMapper);

    String lexicon = LEXICA_ROOT + "resources/" + path + ".xml";
    checkArgument(new File(lexicon).exists(), "no lexicon file at "
            + lexicon);

    AnalysisEngineDescription aed = (AnalysisEngineDescription) createResourceCreationSpecifier(
            conceptMapper, new Object[] { "TokenizerDescriptorPath",
                    getTokenDescPath(tokenDesc), "Stemmer",
                    SCharsStemmer.class.getName() });

    // Create the external resource dependency for the model and bind it
    ExternalResourceFactory.createDependencyAndBind(aed, "DictionaryFile",
            DictionaryResource_impl.class, "file:" + lexicon);
    return aed;
}
 
Example #5
Source File: MongoRelationsTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws ResourceInitializationException, ResourceAccessException {
  // Create a description of an external resource - a fongo instance, in the same way we would
  // have created a shared mongo resource
  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          MONGO, SharedFongoResource.class, "fongo.collection", "test", "fongo.data", "[]");

  // Create the analysis engine
  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          MongoRelations.class, MONGO, erd, "collection", "test");
  ae = AnalysisEngineFactory.createEngine(aed);
  ae.initialize(new CustomResourceSpecifier_impl(), Collections.emptyMap());
  SharedFongoResource sfr = (SharedFongoResource) ae.getUimaContext().getResourceObject(MONGO);

  relations = sfr.getDB().getCollection("test");

  // Ensure we start with no data!
  assertEquals(0L, relations.count());
}
 
Example #6
Source File: BaleenCollectionReaderTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Test
public void testHasNextLooping() throws Exception {

  ExternalResourceDescription contentExtractor =
      ExternalResourceFactory.createNamedResourceDescription(
          KEY_CONTENT_EXTRACTOR, FakeBaleenContentExtractor.class);

  DummyBaleenCollectionReader cr =
      (DummyBaleenCollectionReader)
          CollectionReaderFactory.createReader(
              DummyBaleenCollectionReader.class,
              BaleenCollectionReader.KEY_CONTENT_EXTRACTOR,
              contentExtractor);

  while (cr.hasNext()) {
    JCas jCas = JCasSingleton.getJCasInstance();
    cr.getNext(jCas.getCas());
  }

  cr.destroy();
}
 
Example #7
Source File: PoStagger.java    From Canova with Apache License 2.0 6 votes vote down vote up
public static AnalysisEngineDescription getDescription(String languageCode)
    throws ResourceInitializationException {
  String modelPath = String.format("/models/%s-pos-maxent.bin", languageCode);
  return AnalysisEngineFactory.createEngineDescription(
          PoStagger.class,
          UimaUtil.MODEL_PARAMETER,
          ExternalResourceFactory.createExternalResourceDescription(
                  POSModelResourceImpl.class,
                  PoStagger.class.getResource(modelPath).toString()),
          UimaUtil.SENTENCE_TYPE_PARAMETER,
          Sentence.class.getName(),
          UimaUtil.TOKEN_TYPE_PARAMETER,
          Token.class.getName(),
          UimaUtil.POS_FEATURE_PARAMETER,
          "pos");
}
 
Example #8
Source File: TemporalElasticsearchTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws Exception {
  elasticsearch = new EmbeddedElasticsearch5();

  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          RESOURCE_KEY,
          SharedElasticsearchResource.class,
          PARAM_PORT,
          Integer.toString(elasticsearch.getTransportPort()),
          PARAM_CLUSTER,
          elasticsearch.getClusterName());

  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          TemporalElasticsearch.class,
          TypeSystemSingleton.getTypeSystemDescriptionInstance(),
          RESOURCE_KEY,
          erd,
          PARAM_INDEX,
          TEMPORAL_INDEX);

  ae = AnalysisEngineFactory.createEngine(aed);
}
 
Example #9
Source File: ElasticsearchTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Before
public void setup() throws UIMAException {
  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          RESOURCE_KEY,
          SharedElasticsearchResource.class,
          PARAM_PORT,
          Integer.toString(elasticsearch.getTransportPort()),
          PARAM_CLUSTER,
          elasticsearch.getClusterName());

  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          Elasticsearch.class,
          TypeSystemSingleton.getTypeSystemDescriptionInstance(),
          RESOURCE_KEY,
          erd);

  ae = AnalysisEngineFactory.createEngine(aed);
}
 
Example #10
Source File: MongoPatternSaverTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws ResourceInitializationException, ResourceAccessException {
  // Create a description of an external resource - a fongo instance, in the same way we would
  // have created a shared mongo resource
  final ExternalResourceDescription erd =
      ExternalResourceFactory.createResourceDescription(
          SharedFongoResource.class, "fongo.collection", "test", "fongo.data", "[]");

  // Create the analysis engine
  final AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          MongoPatternSaver.class, MongoPatternSaver.KEY_MONGO, erd, "collection", "test");
  ae = AnalysisEngineFactory.createEngine(aed);
  ae.initialize(new CustomResourceSpecifier_impl(), Collections.emptyMap());

  sfr = (SharedFongoResource) ae.getUimaContext().getResourceObject(MongoPatternSaver.KEY_MONGO);
}
 
Example #11
Source File: CustomResourceTermSuiteAEFactory.java    From termsuite-core with Apache License 2.0 6 votes vote down vote up
public static AnalysisEngineDescription createNormalizerAEDesc(ResourceConfig resourceConfig, Lang lang, Tagger tagger) {
	AnalysisEngineDescription ae;
	try {
		ae = AnalysisEngineFactory.createEngineDescription(
				Lexer.class, 
				Lexer.PARAM_TYPE, "fr.univnantes.termsuite.types.WordAnnotation"
			);
	
		ExternalResourceDescription	segmentBank = ExternalResourceFactory.createExternalResourceDescription(
				SegmentBankResource.class,
				getResourceURL(resourceConfig, ResourceType.SEGMENT_BANK, lang)
			);
				
		ExternalResourceFactory.bindResource(
				ae, 
				SegmentBank.KEY_SEGMENT_BANK, 
				segmentBank);
		return ae;	
	} catch (Exception e) {
		throw new TermSuiteException(e);
	}
}
 
Example #12
Source File: CustomResourceTermSuiteAEFactory.java    From termsuite-core with Apache License 2.0 6 votes vote down vote up
public static AnalysisEngineDescription createWordTokenizerAEDesc(ResourceConfig resourceConfig, Lang lang) {
	AnalysisEngineDescription ae;
	try {
		ae = AnalysisEngineFactory.createEngineDescription(
				Lexer.class, 
				Lexer.PARAM_TYPE, "fr.univnantes.termsuite.types.WordAnnotation"
			);
	
		ExternalResourceDescription	segmentBank = ExternalResourceFactory.createExternalResourceDescription(
				SegmentBankResource.class,
				getResourceURL(resourceConfig, ResourceType.SEGMENT_BANK, lang)
			);
				
		ExternalResourceFactory.bindResource(
				ae, 
				SegmentBank.KEY_SEGMENT_BANK, 
				segmentBank);
		return ae;	
	} catch (Exception e) {
		throw new TermSuiteException(e);
	}
}
 
Example #13
Source File: ElasticsearchRestTest.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Before
public void setup() throws UIMAException, IOException {

  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          ELASTICSEARCH,
          SharedElasticsearchRestResource.class,
          PARAM_URL,
          elasticsearch.getHttpUrl());

  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          ElasticsearchRest.class,
          TypeSystemSingleton.getTypeSystemDescriptionInstance(),
          ELASTICSEARCH,
          erd);

  ae = AnalysisEngineFactory.createEngine(aed);
}
 
Example #14
Source File: AssignTypeToInteractionTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void before() {
  fongoErd =
      ExternalResourceFactory.createNamedResourceDescription(
          "mongo",
          SharedFongoResource.class,
          "fongo.collection",
          "relationTypes",
          "fongo.data",
          "[ { \"source\": \"uk.gov.dstl.baleen.types.common.Person\", \"target\": \"uk.gov.dstl.baleen.types.semantic.Location\", \"type\": \"noun\", \"subType\": \"attack\", \"pos\": \"NOUN\", \"value\":[ \"attack\", \"attacking\", \"attacked\" ] },"
              + "{ \"source\": \"uk.gov.dstl.baleen.types.common.Person\", \"target\": \"uk.gov.dstl.baleen.types.semantic.Location\", \"type\": \"verb\", \"subType\": \"attack\", \"pos\": \"VERB\", \"value\":[ \"attack\" ] } ]");
}
 
Example #15
Source File: ReNounNoAttributesSeedFactsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
protected AnalysisEngine[] createAnalysisEngines() throws ResourceInitializationException {

  // Use OpenNlp to generate the POS etc for us
  final ExternalResourceDescription tokensDesc =
      ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class);
  final ExternalResourceDescription sentencesDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "sentences", SharedOpenNLPModel.class);
  final ExternalResourceDescription posDesc =
      ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class);
  final ExternalResourceDescription chunksDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "phraseChunks", SharedOpenNLPModel.class);

  return asArray(
      createAnalysisEngine(
          OpenNLP.class,
          "tokens",
          tokensDesc,
          "sentences",
          sentencesDesc,
          "posTags",
          posDesc,
          "phraseChunks",
          chunksDesc),
      createAnalysisEngine(MaltParser.class),
      createAnalysisEngine(ReNounDefaultSeedsRelationshipAnnotator.class));
}
 
Example #16
Source File: ReNounCoreferenceSeedFactsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
protected AnalysisEngine[] createAnalysisEngines() throws ResourceInitializationException {

  // Use OpenNlp to generate the POS etc for us
  final ExternalResourceDescription tokensDesc =
      ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class);
  final ExternalResourceDescription sentencesDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "sentences", SharedOpenNLPModel.class);
  final ExternalResourceDescription posDesc =
      ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class);
  final ExternalResourceDescription chunksDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "phraseChunks", SharedOpenNLPModel.class);

  return asArray(
      createAnalysisEngine(
          OpenNLP.class,
          "tokens",
          tokensDesc,
          "sentences",
          sentencesDesc,
          "posTags",
          posDesc,
          "phraseChunks",
          chunksDesc),
      createAnalysisEngine(MaltParser.class),
      createAnalysisEngine(
          ReNounDefaultSeedsRelationshipAnnotator.class,
          PARAM_ONTOLOGY_ATTRIBUTES,
          new String[] {"CEO", "chief executive officer"},
          PARAM_REQUIRE_COREFERENCE,
          true));
}
 
Example #17
Source File: OdinTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
protected AnalysisEngine[] createAnalysisEngines() throws ResourceInitializationException {

  // Use OpenNlp to generate the POS etc for us
  final ExternalResourceDescription tokensDesc =
      ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class);
  final ExternalResourceDescription sentencesDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "sentences", SharedOpenNLPModel.class);
  final ExternalResourceDescription posDesc =
      ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class);
  final ExternalResourceDescription chunksDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "phraseChunks", SharedOpenNLPModel.class);

  File file = new File(RULES_FILE.getFile());

  return asArray(
      createAnalysisEngine(
          OpenNLP.class,
          "tokens",
          tokensDesc,
          "sentences",
          sentencesDesc,
          "posTags",
          posDesc,
          "phraseChunks",
          chunksDesc),
      createAnalysisEngine(MaltParser.class),
      createAnalysisEngine(Odin.class, PARAM_RULES, file.getAbsolutePath()));
}
 
Example #18
Source File: ReNounDependencySeedFactsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
protected AnalysisEngine[] createAnalysisEngines() throws ResourceInitializationException {

  // Use OpenNlp to generate the POS etc for us
  final ExternalResourceDescription tokensDesc =
      ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class);
  final ExternalResourceDescription sentencesDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "sentences", SharedOpenNLPModel.class);
  final ExternalResourceDescription posDesc =
      ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class);
  final ExternalResourceDescription chunksDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "phraseChunks", SharedOpenNLPModel.class);

  return asArray(
      createAnalysisEngine(
          OpenNLP.class,
          "tokens",
          tokensDesc,
          "sentences",
          sentencesDesc,
          "posTags",
          posDesc,
          "phraseChunks",
          chunksDesc),
      createAnalysisEngine(MaltParser.class),
      createAnalysisEngine(
          ReNounDefaultSeedsRelationshipAnnotator.class,
          PARAM_ONTOLOGY_ATTRIBUTES,
          new String[] {"CEO", "chief executive officer"}));
}
 
Example #19
Source File: RelationTypeFilterTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Before
public void before() {
  fongoErd =
      ExternalResourceFactory.createNamedResourceDescription(
          "mongo",
          SharedFongoResource.class,
          "fongo.collection",
          "relationTypes",
          "fongo.data",
          "[ { \"source\": \"uk.gov.dstl.baleen.types.common.Person\", \"target\": \"uk.gov.dstl.baleen.types.semantic.Location\", \"type\": \"went\", \"subType\": \"past\", \"pos\": \"VBG\", \"value\":[ \"went\" ] } ]");
}
 
Example #20
Source File: MongoStatsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testNewFile()
    throws ResourceInitializationException, AnalysisEngineProcessException, IOException {
  // Due to limitations in the shared fongo resource we only test document count here!
  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          "mongo",
          SharedFongoResource.class,
          SharedFongoResource.PARAM_FONGO_COLLECTION,
          "documents",
          SharedFongoResource.PARAM_FONGO_DATA,
          objectMapper.writeValueAsString(DATA));

  File tempFile = File.createTempFile("test", "mongostats");
  tempFile.delete();
  try {

    AnalysisEngine task =
        create(MongoStats.class, "mongo", erd, "file", tempFile.getAbsolutePath());
    execute(task);
    task.destroy();

    List<String> lines = Files.readAllLines(tempFile.toPath());
    assertEquals(2, lines.size());
    assertEquals("timestamp,documents,entities,relations", lines.get(0));

    String[] split = lines.get(1).split(",");
    assertEquals("3", split[1]);
    assertEquals("0", split[2]);
    assertEquals("0", split[3]);
  } finally {
    tempFile.delete();
  }
}
 
Example #21
Source File: MongoTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws ResourceInitializationException, ResourceAccessException {
  // Create a description of an external resource - a fongo instance, in the same way we would
  // have created a shared mongo resource
  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          MONGO, SharedFongoResource.class, "fongo.collection", "test", "fongo.data", "[]");
  ExternalResourceDescription historyErd =
      ExternalResourceFactory.createNamedResourceDescription(
          PipelineBuilder.BALEEN_HISTORY, InMemoryBaleenHistory.class);

  history = Mockito.mock(BaleenHistory.class);

  // Create the analysis engine
  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          Mongo.class,
          MONGO,
          erd,
          "collection",
          "test",
          PipelineBuilder.BALEEN_HISTORY,
          historyErd,
          "outputHistory",
          Boolean.TRUE);
  ae = AnalysisEngineFactory.createEngine(aed);
  ae.initialize(new CustomResourceSpecifier_impl(), Collections.emptyMap());
  SharedFongoResource sfr = (SharedFongoResource) ae.getUimaContext().getResourceObject(MONGO);
  history = (BaleenHistory) ae.getUimaContext().getResourceObject(PipelineBuilder.BALEEN_HISTORY);

  entities = sfr.getDB().getCollection("entities");
  documents = sfr.getDB().getCollection("documents");
  relations = sfr.getDB().getCollection("relations");

  // Ensure we start with no data!
  assertEquals(0L, documents.count());
  assertEquals(0L, entities.count());
  assertEquals(0L, relations.count());
}
 
Example #22
Source File: EntityLinkingAnnotatorTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Before
public void setup() throws ResourceInitializationException {

  stopwords =
      ExternalResourceFactory.createNamedResourceDescription(
          EntityLinkingAnnotator.KEY_STOPWORDS, SharedStopwordResource.class);

  Set<EntityInformation<Person>> entityInformationSet = new HashSet<>();
  entityInformationSet.add(entityInformation);
}
 
Example #23
Source File: RakeKeywordsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testLongDocument() throws Exception {
  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          STOPWORDS, SharedStopwordResource.class);
  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          RakeKeywords.class,
          STOPWORDS,
          erd,
          RakeKeywords.PARAM_MAX_KEYWORDS,
          12,
          RakeKeywords.PARAM_ADD_BUZZWORDS,
          true);

  AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed);

  jCas.setDocumentText(
      new String(Files.readAllBytes(Paths.get(getClass().getResource("turing.txt").toURI()))));
  ae.process(jCas);

  JCasMetadata metadata = new JCasMetadata(jCas);
  Optional<String> keyword = metadata.find(KEYWORD_METADATA_KEY);

  assertTrue(keyword.isPresent());

  ae.destroy();
}
 
Example #24
Source File: RakeKeywordsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testBadStemmer()
    throws ResourceInitializationException, AnalysisEngineProcessException {
  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          STOPWORDS, SharedStopwordResource.class);
  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          RakeKeywords.class,
          STOPWORDS,
          erd,
          RakeKeywords.PARAM_MAX_KEYWORDS,
          12,
          RakeKeywords.PARAM_ADD_BUZZWORDS,
          false,
          RakeKeywords.PARAM_STEMMING,
          "NotARealStemmer");

  AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed);

  jCas.setDocumentText(
      "Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given. These criteria and the corresponding algorithms for contructing a minimal supporting set of solutions can be used in solving all the considered types of systems and systems of mixed types.");
  ae.process(jCas);

  JCasMetadata metadata = new JCasMetadata(jCas);
  Set<String> keywords = metadata.findAll(KEYWORD_METADATA_KEY);

  assertEquals(9, keywords.size());
  assertTrue(keywords.contains("minimal generating sets"));
  assertTrue(keywords.contains("linear diophantine equations"));
  assertTrue(keywords.contains("minimal supporting set"));
  assertTrue(keywords.contains("minimal set"));
  assertTrue(keywords.contains("linear constraints"));
  assertTrue(keywords.contains("natural numbers"));
  assertTrue(keywords.contains("strict inequations"));
  assertTrue(keywords.contains("nonstrict inequations"));
  assertTrue(keywords.contains("upper bounds"));

  ae.destroy();
}
 
Example #25
Source File: RakeKeywordsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testCustomStoplist()
    throws ResourceInitializationException, AnalysisEngineProcessException {
  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          STOPWORDS, SharedStopwordResource.class);
  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          RakeKeywords.class,
          STOPWORDS,
          erd,
          RakeKeywords.PARAM_MAX_KEYWORDS,
          12,
          RakeKeywords.PARAM_ADD_BUZZWORDS,
          false,
          RakeKeywords.PARAM_STOPLIST,
          getClass().getResource("exampleStoplist.txt").getPath());

  AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed);

  jCas.setDocumentText("Bill and Ben went off to the shops in London town.");
  ae.process(jCas);

  JCasMetadata metadata = new JCasMetadata(jCas);
  Set<String> keywords = metadata.findAll(KEYWORD_METADATA_KEY);

  assertEquals(1, keywords.size());
  assertTrue(keywords.contains("london town"));

  ae.destroy();
}
 
Example #26
Source File: IdentifyInteractionsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Before
public void before() {
  fongoErd =
      ExternalResourceFactory.createNamedResourceDescription(
          "mongo",
          SharedFongoResource.class,
          "fongo.collection",
          "patterns",
          "fongo.data",
          "[ { \"_id\":\"1\", \"words\": [ { \"lemma\":\"went\", \"pos\":\"VERB\"}], \"source\":{\"type\":\"Person\"}, \"target\":{\"type\":\"Location\"}}, { \"_id\":\"2\", \"words\": [ { \"lemma\":\"went\", \"pos\":\"VERB\"}, { \"lemma\":\"after\", \"pos\":\"VERB\"} ], \"source\":{ \"type\":\"Person\" }, \"target\":{\"type\":\"Person\" } } ]");

  wordnetErd =
      ExternalResourceFactory.createNamedResourceDescription(
          "wordnet", SharedWordNetResource.class);
}
 
Example #27
Source File: RakeKeywordsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testMaxNumber()
    throws ResourceInitializationException, AnalysisEngineProcessException {
  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          STOPWORDS, SharedStopwordResource.class);
  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          RakeKeywords.class,
          STOPWORDS,
          erd,
          RakeKeywords.PARAM_MAX_KEYWORDS,
          3,
          RakeKeywords.PARAM_ADD_BUZZWORDS,
          false);

  AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed);

  jCas.setDocumentText(
      "Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given. These criteria and the corresponding algorithms for contructing a minimal supporting set of solutions can be used in solving all the considered types of systems and systems of mixed types.");
  ae.process(jCas);

  JCasMetadata metadata = new JCasMetadata(jCas);
  Set<String> keywords = metadata.findAll(KEYWORD_METADATA_KEY);

  assertEquals(3, keywords.size());
  assertTrue(keywords.contains("minimal generating sets"));
  assertTrue(keywords.contains("linear diophantine equations"));
  assertTrue(keywords.contains("minimal supporting set"));

  ae.destroy();
}
 
Example #28
Source File: RakeKeywordsTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Test
public void testNoBuzzwords()
    throws ResourceInitializationException, AnalysisEngineProcessException {
  ExternalResourceDescription erd =
      ExternalResourceFactory.createNamedResourceDescription(
          STOPWORDS, SharedStopwordResource.class);
  AnalysisEngineDescription aed =
      AnalysisEngineFactory.createEngineDescription(
          RakeKeywords.class,
          STOPWORDS,
          erd,
          RakeKeywords.PARAM_MAX_KEYWORDS,
          12,
          RakeKeywords.PARAM_ADD_BUZZWORDS,
          false);

  AnalysisEngine ae = AnalysisEngineFactory.createEngine(aed);

  jCas.setDocumentText(
      "Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions and algorithms of construction of minimal generating sets of solutions for all types of systems are given. These criteria and the corresponding algorithms for contructing a minimal supporting set of solutions can be used in solving all the considered types of systems and systems of mixed types.");
  ae.process(jCas);

  JCasMetadata metadata = new JCasMetadata(jCas);
  Set<String> keywords = metadata.findAll(KEYWORD_METADATA_KEY);

  assertEquals(9, keywords.size());
  assertTrue(keywords.contains("minimal generating sets"));
  assertTrue(keywords.contains("linear diophantine equations"));
  assertTrue(keywords.contains("minimal supporting set"));
  assertTrue(keywords.contains("minimal set"));
  assertTrue(keywords.contains("linear constraints"));
  assertTrue(keywords.contains("natural numbers"));
  assertTrue(keywords.contains("strict inequations"));
  assertTrue(keywords.contains("nonstrict inequations"));
  assertTrue(keywords.contains("upper bounds"));

  ae.destroy();
}
 
Example #29
Source File: TokenizerAnnotator.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public static AnalysisEngineDescription getDescription() throws ResourceInitializationException {
    String modelPath = String.format("/models/%s-token.bin", "en");
    return AnalysisEngineFactory.createEngineDescription(ConcurrentTokenizer.class,
                    opennlp.uima.util.UimaUtil.MODEL_PARAMETER,
                    ExternalResourceFactory.createExternalResourceDescription(TokenizerModelResourceImpl.class,
                                    ConcurrentTokenizer.class.getResource(modelPath).toString()),
                    opennlp.uima.util.UimaUtil.SENTENCE_TYPE_PARAMETER, Sentence.class.getName(),
                    opennlp.uima.util.UimaUtil.TOKEN_TYPE_PARAMETER, Token.class.getName());
}
 
Example #30
Source File: OpenNLPTest.java    From baleen with Apache License 2.0 5 votes vote down vote up
@Override
public void beforeTest() throws UIMAException {
  super.beforeTest();

  ExternalResourceDescription tokensDesc =
      ExternalResourceFactory.createNamedResourceDescription("tokens", SharedOpenNLPModel.class);
  ExternalResourceDescription sentencesDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "sentences", SharedOpenNLPModel.class);
  ExternalResourceDescription posDesc =
      ExternalResourceFactory.createNamedResourceDescription("posTags", SharedOpenNLPModel.class);
  ExternalResourceDescription chunksDesc =
      ExternalResourceFactory.createNamedResourceDescription(
          "phraseChunks", SharedOpenNLPModel.class);

  AnalysisEngineDescription descLanguage =
      AnalysisEngineFactory.createEngineDescription(
          uk.gov.dstl.baleen.annotators.language.OpenNLP.class,
          "tokens",
          tokensDesc,
          "sentences",
          sentencesDesc,
          "posTags",
          posDesc,
          "phraseChunks",
          chunksDesc);
  aeLanguage = AnalysisEngineFactory.createEngine(descLanguage);

  String text =
      "This is a mention of John Smith visiting Thomas Brown at the United Nations in New York on the afternoon of February 10th, 2014.";

  jCas.setDocumentText(text);
}