Java Code Examples for org.apache.uima.fit.factory.ExternalResourceFactory#createExternalResourceDescription()

The following examples show how to use org.apache.uima.fit.factory.ExternalResourceFactory#createExternalResourceDescription() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CustomResourceTermSuiteAEFactory.java    From termsuite-core with Apache License 2.0 6 votes vote down vote up
public static AnalysisEngineDescription createNormalizerAEDesc(ResourceConfig resourceConfig, Lang lang, Tagger tagger) {
	AnalysisEngineDescription ae;
	try {
		ae = AnalysisEngineFactory.createEngineDescription(
				Lexer.class, 
				Lexer.PARAM_TYPE, "fr.univnantes.termsuite.types.WordAnnotation"
			);
	
		ExternalResourceDescription	segmentBank = ExternalResourceFactory.createExternalResourceDescription(
				SegmentBankResource.class,
				getResourceURL(resourceConfig, ResourceType.SEGMENT_BANK, lang)
			);
				
		ExternalResourceFactory.bindResource(
				ae, 
				SegmentBank.KEY_SEGMENT_BANK, 
				segmentBank);
		return ae;	
	} catch (Exception e) {
		throw new TermSuiteException(e);
	}
}
 
Example 2
Source File: CustomResourceTermSuiteAEFactory.java    From termsuite-core with Apache License 2.0 6 votes vote down vote up
public static AnalysisEngineDescription createWordTokenizerAEDesc(ResourceConfig resourceConfig, Lang lang) {
	AnalysisEngineDescription ae;
	try {
		ae = AnalysisEngineFactory.createEngineDescription(
				Lexer.class, 
				Lexer.PARAM_TYPE, "fr.univnantes.termsuite.types.WordAnnotation"
			);
	
		ExternalResourceDescription	segmentBank = ExternalResourceFactory.createExternalResourceDescription(
				SegmentBankResource.class,
				getResourceURL(resourceConfig, ResourceType.SEGMENT_BANK, lang)
			);
				
		ExternalResourceFactory.bindResource(
				ae, 
				SegmentBank.KEY_SEGMENT_BANK, 
				segmentBank);
		return ae;	
	} catch (Exception e) {
		throw new TermSuiteException(e);
	}
}
 
Example 3
Source File: CustomResourceTermSuiteAEFactory.java    From termsuite-core with Apache License 2.0 6 votes vote down vote up
/**
 * Spots fixed expressions in the CAS an creates {@link FixedExpression}
 * annotation whenever one is found.
 * 
 * @return
 */
public static AnalysisEngineDescription createFixedExpressionSpotterAEDesc(ResourceConfig resourceConfig, Lang lang)  {
	try {
		AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(
				FixedExpressionSpotter.class,
				FixedExpressionSpotter.FIXED_EXPRESSION_MAX_SIZE, 5,
				FixedExpressionSpotter.REMOVE_WORD_ANNOTATIONS_FROM_CAS, false,
				FixedExpressionSpotter.REMOVE_TERM_OCC_ANNOTATIONS_FROM_CAS, true
			);
		
		ExternalResourceDescription fixedExprRes = ExternalResourceFactory.createExternalResourceDescription(
				FixedExpressionResource.class, 
				getResourceURL(resourceConfig, ResourceType.FIXED_EXPRESSIONS, lang));
		
		ExternalResourceFactory.bindResource(
				ae,
				FixedExpressionResource.FIXED_EXPRESSION_RESOURCE, 
				fixedExprRes
			);
		
		return ae;
	} catch (Exception e) {
		throw new PreparationPipelineException(e);
	}
}
 
Example 4
Source File: CustomResourceTermSuiteAEFactory.java    From termsuite-core with Apache License 2.0 6 votes vote down vote up
private static AnalysisEngineDescription createSubNormalizerAEDesc(String target, URL mappingFile)  {
	try {
		AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(
				Mapper.class, 
				Mapper.PARAM_SOURCE, "fr.univnantes.termsuite.types.WordAnnotation:tag",
				Mapper.PARAM_TARGET, target,
				Mapper.PARAM_UPDATE, true
			);
		
		ExternalResourceDescription mappingRes = ExternalResourceFactory.createExternalResourceDescription(
				MappingResource.class,
				mappingFile
			);
		
		ExternalResourceFactory.bindResource(
				ae,
				Mapping.KEY_MAPPING, 
				mappingRes 
			);

		return ae;
	} catch (Exception e) {
		throw new PreparationPipelineException(e);
	}
}
 
Example 5
Source File: FixedExpressionSpotterSpec.java    From termsuite-core with Apache License 2.0 6 votes vote down vote up
private AnalysisEngine makeAE(boolean removeWordAnnotationFromCas, boolean removeTermOccAnnotationFromCas) throws Exception {
	AnalysisEngineDescription aeDesc = AnalysisEngineFactory.createEngineDescription(
			FixedExpressionSpotter.class,
			FixedExpressionSpotter.FIXED_EXPRESSION_MAX_SIZE, 5,
			FixedExpressionSpotter.REMOVE_WORD_ANNOTATIONS_FROM_CAS, removeWordAnnotationFromCas,
			FixedExpressionSpotter.REMOVE_TERM_OCC_ANNOTATIONS_FROM_CAS, removeTermOccAnnotationFromCas
		);
	
	/*
	 * The term index resource
	 */
	ExternalResourceDescription fixedExpressionDesc = ExternalResourceFactory.createExternalResourceDescription(
			FixedExpressionResource.FIXED_EXPRESSION_RESOURCE,
			FixedExpressionResource.class, 
			"file:fr/univnantes/termsuite/test/resources/french-fixed-expressions.txt"
	);
	ExternalResourceFactory.bindResource(aeDesc, fixedExpressionDesc);

	AnalysisEngine ae = AnalysisEngineFactory.createEngine(aeDesc);
	return ae;
}
 
Example 6
Source File: CreateDbWriterDescriptor.java    From ctakes-docker with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws ResourceInitializationException, IOException, SAXException {
    Map<String,String> env = System.getenv();
    ExternalResourceDescription erd = ExternalResourceFactory.createExternalResourceDescription(
            JdbcConnectionResourceImpl.class,
            "null",   // method is ambiguous because all strings are objects so this is here as the unneede (i think) aURL argument
            JdbcConnectionResourceImpl.PARAM_DRIVER_CLASS,
            "oracle.jdbc.OracleDriver",
            JdbcConnectionResourceImpl.PARAM_URL,
            "jdbc:oracle:thin:@" + System.getProperty("oracle_host"),
            JdbcConnectionResourceImpl.PARAM_USERNAME,
            System.getProperty("oracle_user"),
            JdbcConnectionResourceImpl.PARAM_PASSWORD,
            System.getProperty("oracle_pw"),
            JdbcConnectionResourceImpl.PARAM_KEEP_ALIVE,
            "false",
            AbstractJdbcWriter.PARAM_DB_CONN_RESRC,
            "DbConnectionWrite");

    AnalysisEngineDescription aed = AnalysisEngineFactory.createEngineDescription(I2b2JdbcWriter.class,
            I2b2JdbcWriter.PARAM_VECTOR_TABLE,
            System.getProperty("oracle_table"),
            AbstractJdbcWriter.PARAM_DB_CONN_RESRC,
            erd
            );

    aed.toXML(new FileWriter(args[0]));
}
 
Example 7
Source File: NewsleakPreprocessor.java    From newsleak with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Gets the metadata resource description.
 *
 * @return the metadata resource description
 */
protected ExternalResourceDescription getMetadataResourceDescription() {
	if (metadataResourceDesc == null) {
		metadataResourceDesc = ExternalResourceFactory.createExternalResourceDescription(MetadataResource.class,
				MetadataResource.PARAM_METADATA_FILE, this.dataDirectory + File.separator + this.metadataFile,
				MetadataResource.PARAM_RESET_METADATA_FILE, "true");
	}
	return metadataResourceDesc;
}
 
Example 8
Source File: NewsleakPreprocessor.java    From newsleak with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Gets the elasticsearch resource description.
 *
 * @param createNewIndex Should be "true" or "false". If "true", the index will be newly created (a pre-existing index with the same name will be overwritten)
 * @return the metadata resource description
 */
protected ExternalResourceDescription getElasticsearchResourceDescription(String createNewIndex) {
	ExternalResourceDescription esResource = ExternalResourceFactory.createExternalResourceDescription(
			ElasticsearchResource.class, ElasticsearchResource.PARAM_CREATE_INDEX, createNewIndex,
			ElasticsearchResource.PARAM_CLUSTERNAME, this.esClustername, ElasticsearchResource.PARAM_INDEX,
			this.esIndex, ElasticsearchResource.PARAM_HOST, this.esHost, ElasticsearchResource.PARAM_PORT,
			this.esPort, ElasticsearchResource.PARAM_DOCUMENT_MAPPING_FILE,
			"desc/elasticsearch_mapping_document_2.4.json",
			ElasticsearchResource.PARAM_METADATA_FILE, this.dataDirectory + File.separator + this.metadataFile);
	return esResource;
}
 
Example 9
Source File: InformationExtraction2Postgres.java    From newsleak with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Gets the UIMA reader according to the current configuration.
 *
 * @param type
 *            The reader type (e.g. "csv" for externally preprocessed fulltexts
 *            and metadata, or "hoover" for the Hoover text extraction system)
 * @return the reader
 * @throws ResourceInitializationException
 *             the resource initialization exception
 */
public CollectionReaderDescription getReader(String type) throws ResourceInitializationException {
	CollectionReaderDescription reader = null;
	if (type.equals("csv")) {
		reader = CollectionReaderFactory.createReaderDescription(NewsleakCsvStreamReader.class, this.typeSystem,
				NewsleakCsvStreamReader.PARAM_DOCUMENT_FILE, this.documentFile,
				NewsleakCsvStreamReader.PARAM_METADATA_FILE, this.metadataFile,
				NewsleakCsvStreamReader.PARAM_INPUTDIR, this.dataDirectory,
				NewsleakCsvStreamReader.PARAM_DEFAULT_LANG, this.defaultLanguage,
				NewsleakReader.PARAM_DEBUG_MAX_DOCS, this.debugMaxDocuments, NewsleakReader.PARAM_MAX_DOC_LENGTH,
				this.maxDocumentLength);
	} else if (type.equals("hoover")) {
		this.metadataFile = this.hooverTmpMetadata;
		ExternalResourceDescription hooverResource = ExternalResourceFactory.createExternalResourceDescription(
				HooverResource.class, HooverResource.PARAM_HOST, this.hooverHost, HooverResource.PARAM_CLUSTERNAME,
				this.hooverClustername, HooverResource.PARAM_INDEX, this.hooverIndex, HooverResource.PARAM_PORT,
				this.hooverPort, HooverResource.PARAM_SEARCHURL, this.hooverSearchUrl);
		reader = CollectionReaderFactory.createReaderDescription(HooverElasticsearchReader.class, this.typeSystem,
				HooverElasticsearchReader.RESOURCE_HOOVER, hooverResource,
				HooverElasticsearchReader.RESOURCE_METADATA, this.getMetadataResourceDescription(),
				NewsleakReader.PARAM_DEBUG_MAX_DOCS, this.debugMaxDocuments, NewsleakReader.PARAM_MAX_DOC_LENGTH,
				this.maxDocumentLength);
	} else {
		this.logger.log(Level.SEVERE, "Unknown reader type: " + type);
		System.exit(1);
	}
	return reader;
}
 
Example 10
Source File: CustomResourceTermSuiteAEFactory.java    From termsuite-core with Apache License 2.0 5 votes vote down vote up
public static AnalysisEngineDescription createTreeTaggerAEDesc(ResourceConfig resourceConfig, Lang lang, Path treeTaggerPath) {
	try {
		AnalysisEngineDescription treeTaggerAE = AnalysisEngineFactory.createEngineDescription(
				TreeTaggerWrapper.class, 
				TreeTaggerWrapper.PARAM_ANNOTATION_TYPE, "fr.univnantes.termsuite.types.WordAnnotation",
				TreeTaggerWrapper.PARAM_TAG_FEATURE, "tag",
				TreeTaggerWrapper.PARAM_LEMMA_FEATURE, "lemma",
				TreeTaggerWrapper.PARAM_UPDATE_ANNOTATION_FEATURES, true,
				TreeTaggerWrapper.PARAM_TT_HOME_DIRECTORY, treeTaggerPath.toString()
			);
		
		ExternalResourceDescription ttParam = ExternalResourceFactory.createExternalResourceDescription(
				TreeTaggerParameter.class,
				getResourceURL(resourceConfig, ResourceType.TREETAGGER_CONFIG, lang, Tagger.TREE_TAGGER)
			);
		
		ExternalResourceFactory.bindResource(
				treeTaggerAE,
				TreeTaggerParameter.KEY_TT_PARAMETER, 
				ttParam 
			);
		
		AnalysisEngineDescription lemmaFixerAE = AnalysisEngineFactory.createEngineDescription(
				TreeTaggerLemmaFixer.class,
				TreeTaggerLemmaFixer.LANGUAGE, lang.getCode()
			);

		
		AnalysisEngineDescription normalizerAE = createNormalizerAE(resourceConfig, lang, Tagger.TREE_TAGGER);
		
		return AnalysisEngineFactory.createEngineDescription(
				treeTaggerAE,
				lemmaFixerAE, 
				normalizerAE);
	} catch (Exception e) {
		throw new TermSuiteException(e);
	}
}
 
Example 11
Source File: SparkSerializableAnalysisEngine.java    From ambiverse-nlu with Apache License 2.0 4 votes vote down vote up
protected ExternalResourceDescription getLocator() {
    return ExternalResourceFactory.createExternalResourceDescription(HdfsResourceLoaderLocator.class, new Object[0]);
}
 
Example 12
Source File: CreateDbReaderDescriptor.java    From ctakes-docker with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws ResourceInitializationException, IOException, SAXException, InvalidXMLException {
    String sqlStatement = String.format("select encounter_num,patient_num,observation_blob,start_date,provider_id,modifier_cd,concept_cd,instance_num from %s where sourcesystem_cd='NOTES' and observation_blob is not null and length(observation_blob) > 0", System.getProperty("oracle_table"));

    Map<String,String> env = System.getenv();

    ExternalResourceDescription erd = ExternalResourceFactory.createExternalResourceDescription(
            JdbcConnectionResourceImpl.class,
            "null",   // method is ambiguous because all strings are objects so this is here as the unneede (i think) aURL argument
            JdbcConnectionResourceImpl.PARAM_DRIVER_CLASS,
            "oracle.jdbc.OracleDriver",
            JdbcConnectionResourceImpl.PARAM_URL,
            "jdbc:oracle:thin:@" + System.getProperty("oracle_host"),
            JdbcConnectionResourceImpl.PARAM_USERNAME,
            System.getProperty("oracle_user"),
            JdbcConnectionResourceImpl.PARAM_PASSWORD,
            System.getProperty("oracle_pw"),
            JdbcConnectionResourceImpl.PARAM_KEEP_ALIVE,
            "false",
            I2b2CollectionReader.PARAM_DB_CONN_RESRC,
            "DbConnectionRead");

    CollectionReaderDescription aed = CollectionReaderFactory.createReaderDescription(MemReleaseI2b2CollectionReader.class,
            I2b2CollectionReader.PARAM_SQL,
            sqlStatement,
            I2b2CollectionReader.PARAM_DOCTEXT_COL,
            "OBSERVATION_BLOB",
            I2b2CollectionReader.PARAM_DOCID_COLS,
            new String[]{"encounter_num", "patient_num", "modifier_cd"},
            I2b2CollectionReader.PARAM_DOCID_DELIMITER,
            "_",
            I2b2CollectionReader.PARAM_VALUE_PASSPHARASE,
            "",
            I2b2CollectionReader.PARAM_PATIENT_NUM_COL,
            "patient_num",
            I2b2CollectionReader.PARAM_ENCOUNTER_NUM_COL,
            "encounter_num",
            I2b2CollectionReader.PARAM_PROVIDER_ID_COL,
            "provider_id",
            I2b2CollectionReader.PARAM_START_DATE_COL,
            "start_date",
            I2b2CollectionReader.PARAM_CONCEPT_CD_COL,
            "concept_cd",
            I2b2CollectionReader.PARAM_INSTANCE_NUM_COL,
            "instance_num",
            I2b2CollectionReader.PARAM_DB_CONN_RESRC,
            "DbConnectionRead"
            );

    ExternalResourceFactory.createDependency(aed, "DbConnectionRead", JdbcConnectionResource.class);
    ExternalResourceFactory.bindExternalResource(aed, "DbConnectionRead", erd);
    aed.toXML(new FileWriter(args[0]));
}
 
Example 13
Source File: InformationExtraction2Postgres.java    From newsleak with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 * The language detection pipeline detects the language of each document and
 * writes this information and the metadata acquired by the the reader
 * temporarily to disk. The extracted fulltext is temporarily stored in the
 * elasticsearch index.
 *
 * @throws Exception
 *             the exception
 */
public void pipelineLanguageDetection() throws Exception {
	statusListener = new NewsleakStatusCallbackListener(this.logger);

	// check for language support
	HashSet<String> supportedLanguages = LanguageDetector.getSupportedLanguages();
	for (String lang : this.processLanguages) {
		if (!supportedLanguages.contains(lang)) {
			logger.log(Level.SEVERE, "Language " + lang + " not supported (use ISO 639-3 codes)");
			System.exit(1);
		}
	}

	// reader
	CollectionReaderDescription reader = getReader(this.readerType);

	// language detection annotator
	ExternalResourceDescription resourceLangDect = ExternalResourceFactory.createExternalResourceDescription(
			LanguageDetectorResource.class, LanguageDetectorResource.PARAM_MODEL_FILE,
			"resources/langdetect-183.bin");
	AnalysisEngineDescription langDetect = AnalysisEngineFactory.createEngineDescription(LanguageDetector.class,
			LanguageDetector.MODEL_FILE, resourceLangDect, LanguageDetector.METADATA_FILE,
			this.getMetadataResourceDescription(), LanguageDetector.PARAM_DEFAULT_LANG, this.defaultLanguage,
			LanguageDetector.DOCLANG_FILE, "data/documentLanguages.ser");

	// elasticsearch writer to store fulltexts
	AnalysisEngineDescription esWriter = AnalysisEngineFactory.createEngineDescription(
			ElasticsearchDocumentWriter.class, ElasticsearchDocumentWriter.RESOURCE_ESCLIENT,
			this.getElasticsearchResourceDescription("true"),
			ElasticsearchDocumentWriter.PARAM_PARAGRAPHS_AS_DOCUMENTS, this.paragraphsAsDocuments,
			ElasticsearchDocumentWriter.PARAM_MINIMUM_PARAGRAPH_LENGTH, this.paragraphMinimumLength,
			ElasticsearchDocumentWriter.PARAM_MAX_DOC_LENGTH, this.maxDocumentLength);

	// create pipeline
	AnalysisEngineDescription ldPipeline = AnalysisEngineFactory.createEngineDescription(langDetect, esWriter);

	// run pipeline in parallel manner with UIMA CPE
	CpeBuilder ldCpeBuilder = new CpeBuilder();
	ldCpeBuilder.setReader(reader);
	ldCpeBuilder.setMaxProcessingUnitThreadCount(this.threads);
	ldCpeBuilder.setAnalysisEngine(ldPipeline);
	CollectionProcessingEngine engine = ldCpeBuilder.createCpe(statusListener);
	engine.process();

	// wait until language detection has finished before running the next
	// information extraction processing step
	while (statusListener.isProcessing()) {
		Thread.sleep(500);
	}

}
 
Example 14
Source File: CustomResourceTermSuiteAEFactory.java    From termsuite-core with Apache License 2.0 4 votes vote down vote up
public static AnalysisEngineDescription createRegexSpotterAEDesc(ResourceConfig resourceConfig, Lang lang) {
	try {
		AnalysisEngineDescription ae = AnalysisEngineFactory.createEngineDescription(
				RegexSpotter.class,
				TokenRegexAE.PARAM_ALLOW_OVERLAPPING_OCCURRENCES, true
			);
		
		
		addParameters(
				ae, 
				RegexSpotter.LOG_OVERLAPPING_RULES, false);
		
		
		ExternalResourceDescription mwtRules = ExternalResourceFactory.createExternalResourceDescription(
				RegexListResource.class, 
				getResourceURL(resourceConfig, ResourceType.MWT_RULES, lang));
		
		ExternalResourceFactory.bindResource(
				ae,
				RegexListResource.KEY_TOKEN_REGEX_RULES, 
				mwtRules
			);

		if(lang != Lang.ZH) {
			ExternalResourceDescription allowedCharsRes = ExternalResourceFactory.createExternalResourceDescription(
					CharacterFootprintTermFilter.class, 
					getResourceURL(resourceConfig, ResourceType.ALLOWED_CHARS, lang));
			
			ExternalResourceFactory.bindResource(
					ae,
					RegexSpotter.CHARACTER_FOOTPRINT_TERM_FILTER, 
					allowedCharsRes
					);
		}

		ExternalResourceDescription stopWordsRes = ExternalResourceFactory.createExternalResourceDescription(
				DefaultFilterResource.class, 
				getResourceURL(resourceConfig, ResourceType.STOP_WORDS_FILTER, lang));
		
		ExternalResourceFactory.bindResource(
				ae,
				RegexSpotter.STOP_WORD_FILTER, 
				stopWordsRes
			);
		return ae;
	} catch(Exception e) {
		throw new TermSuiteException(e);
	}
}