opennlp.tools.util.TrainingParameters Java Examples
The following examples show how to use
opennlp.tools.util.TrainingParameters.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OpenNlpDoccatRecommender.java From inception with Apache License 2.0 | 6 votes |
@Override public void train(RecommenderContext aContext, List<CAS> aCasses) throws RecommendationException { List<DocumentSample> docSamples = extractSamples(aCasses); if (docSamples.size() < 2) { LOG.info("Not enough training data: [{}] items", docSamples.size()); return; } // The beam size controls how many results are returned at most. But even if the user // requests only few results, we always use at least the default bean size recommended by // OpenNLP int beamSize = Math.max(maxRecommendations, NameFinderME.DEFAULT_BEAM_SIZE); TrainingParameters params = traits.getParameters(); params.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize)); DoccatModel model = train(docSamples, params); aContext.put(KEY_MODEL, model); }
Example #2
Source File: LanguageDetectorAndTrainingDataUnitTest.java From tutorials with MIT License | 6 votes |
@Test public void givenLanguageDictionary_whenLanguageDetect_thenLanguageIsDetected() throws FileNotFoundException, IOException { InputStreamFactory dataIn = new MarkableFileInputStreamFactory(new File("src/main/resources/models/DoccatSample.txt")); ObjectStream lineStream = new PlainTextByLineStream(dataIn, "UTF-8"); LanguageDetectorSampleStream sampleStream = new LanguageDetectorSampleStream(lineStream); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 5); params.put("DataIndexer", "TwoPass"); params.put(TrainingParameters.ALGORITHM_PARAM, "NAIVEBAYES"); LanguageDetectorModel model = LanguageDetectorME.train(sampleStream, params, new LanguageDetectorFactory()); LanguageDetector ld = new LanguageDetectorME(model); Language[] languages = ld.predictLanguages("estava em uma marcenaria na Rua Bruno"); assertThat(Arrays.asList(languages)).extracting("lang", "confidence").contains(tuple("pob", 0.9999999950605625), tuple("ita", 4.939427661577956E-9), tuple("spa", 9.665954064665144E-15), tuple("fra", 8.250349924885834E-25)); }
Example #3
Source File: AbstractTaggerTrainer.java From ixa-pipe-pos with Apache License 2.0 | 6 votes |
public final POSModel train(final TrainingParameters params) { // features if (getPosTaggerFactory() == null) { throw new IllegalStateException( "Classes derived from AbstractTrainer must " + " create a POSTaggerFactory features!"); } // training model POSModel trainedModel = null; POSEvaluator posEvaluator = null; try { trainedModel = POSTaggerME.train(this.lang, this.trainSamples, params, getPosTaggerFactory()); final POSTaggerME posTagger = new POSTaggerME(trainedModel); posEvaluator = new POSEvaluator(posTagger); posEvaluator.evaluate(this.testSamples); } catch (final IOException e) { System.err.println("IO error while loading training and test sets!"); e.printStackTrace(); System.exit(1); } System.out.println("Final result: " + posEvaluator.getWordAccuracy()); return trainedModel; }
Example #4
Source File: AbstractTaggerTrainer.java From ixa-pipe-pos with Apache License 2.0 | 6 votes |
/** * Construct an AbstractTrainer. In the params parameter there is information * about the language, the featureset, and whether to use pos tag dictionaries * or automatically created dictionaries from the training set. * * @param params * the training parameters * @throws IOException * the io exceptions */ public AbstractTaggerTrainer(final TrainingParameters params) throws IOException { this.lang = Flags.getLanguage(params); final String trainData = Flags.getDataSet("TrainSet", params); final String testData = Flags.getDataSet("TestSet", params); final ObjectStream<String> trainStream = InputOutputUtils .readFileIntoMarkableStreamFactory(trainData); this.trainSamples = new MorphoSampleStream(trainStream); final ObjectStream<String> testStream = InputOutputUtils .readFileIntoMarkableStreamFactory(testData); this.testSamples = new MorphoSampleStream(testStream); final ObjectStream<String> dictStream = InputOutputUtils .readFileIntoMarkableStreamFactory(trainData); setDictSamples(new MorphoSampleStream(dictStream)); this.dictCutOff = Flags.getAutoDictFeatures(params); this.ngramCutOff = Flags.getNgramDictFeatures(params); }
Example #5
Source File: AbstractLemmatizerTrainer.java From ixa-pipe-pos with Apache License 2.0 | 6 votes |
public final LemmatizerModel train(final TrainingParameters params) { // features if (getLemmatizerFactory() == null) { throw new IllegalStateException( "Classes derived from AbstractLemmatizerTrainer must " + " create a LemmatizerFactory features!"); } // training model LemmatizerModel trainedModel = null; LemmatizerEvaluator lemmatizerEvaluator = null; try { trainedModel = LemmatizerME.train(this.lang, this.trainSamples, params, getLemmatizerFactory()); final LemmatizerME lemmatizer = new LemmatizerME(trainedModel); lemmatizerEvaluator = new LemmatizerEvaluator(lemmatizer); lemmatizerEvaluator.evaluate(this.testSamples); } catch (final IOException e) { System.err.println("IO error while loading training and test sets!"); e.printStackTrace(); System.exit(1); } System.out.println("Final result: " + lemmatizerEvaluator.getWordAccuracy()); return trainedModel; }
Example #6
Source File: OpenNlpNerRecommender.java From inception with Apache License 2.0 | 6 votes |
@Override public void train(RecommenderContext aContext, List<CAS> aCasses) throws RecommendationException { List<NameSample> nameSamples = extractNameSamples(aCasses); if (nameSamples.size() < 2) { LOG.info("Not enough training data: [{}] items", nameSamples.size()); return; } // The beam size controls how many results are returned at most. But even if the user // requests only few results, we always use at least the default bean size recommended by // OpenNLP int beamSize = Math.max(maxRecommendations, NameFinderME.DEFAULT_BEAM_SIZE); TrainingParameters params = traits.getParameters(); params.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize)); TokenNameFinderModel model = train(nameSamples, params); aContext.put(KEY_MODEL, model); }
Example #7
Source File: OpenNlpPosRecommender.java From inception with Apache License 2.0 | 6 votes |
@Nullable private POSModel train(List<POSSample> aPosSamples, TrainingParameters aParameters) throws RecommendationException { if (aPosSamples.isEmpty()) { return null; } try (POSSampleStream stream = new POSSampleStream(aPosSamples)) { POSTaggerFactory taggerFactory = new POSTaggerFactory(); return POSTaggerME.train("unknown", stream, aParameters, taggerFactory); } catch (IOException e) { throw new RecommendationException("Error training model", e); } }
Example #8
Source File: OpenNlpPosRecommender.java From inception with Apache License 2.0 | 6 votes |
@Override public void train(RecommenderContext aContext, List<CAS> aCasses) throws RecommendationException { List<POSSample> posSamples = extractPosSamples(aCasses); if (posSamples.size() < 2) { LOG.info("Not enough training data: [{}] items", posSamples.size()); return; } // The beam size controls how many results are returned at most. But even if the user // requests only few results, we always use at least the default bean size recommended by // OpenNLP int beamSize = Math.max(maxRecommendations, POSTaggerME.DEFAULT_BEAM_SIZE); TrainingParameters params = traits.getParameters(); params.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize)); POSModel model = train(posSamples, params); aContext.put(KEY_MODEL, model); }
Example #9
Source File: Flags.java From ixa-pipe-pos with Apache License 2.0 | 5 votes |
public static String getFeatureSet(final TrainingParameters params) { String featureSet = null; if (params.getSettings().get("FeatureSet") != null) { featureSet = params.getSettings().get("FeatureSet"); } else { featureSet = Flags.DEFAULT_FEATURESET_FLAG; } return featureSet; }
Example #10
Source File: OpenNlpDoccatRecommenderTraits.java From inception with Apache License 2.0 | 5 votes |
public TrainingParameters getParameters() { TrainingParameters parameters = TrainingParameters.defaultParams(); parameters.put(AbstractTrainer.VERBOSE_PARAM, false); parameters.put(TrainingParameters.ITERATIONS_PARAM, iterations); parameters.put(TrainingParameters.CUTOFF_PARAM, cutoff); parameters.put(TrainingParameters.THREADS_PARAM, numThreads); return parameters; }
Example #11
Source File: POSCrossValidator.java From ixa-pipe-pos with Apache License 2.0 | 5 votes |
/** * Get the postagger cross validator. * * @param params * the training parameters * @return the pos tagger cross validator */ private POSTaggerCrossValidator getPOSTaggerCrossValidator( final TrainingParameters params) { final File dictPath = new File(Flags.getDictionaryFeatures(params)); // features if (this.posTaggerFactory == null) { throw new IllegalStateException( "You must create the POSTaggerFactory features!"); } POSTaggerCrossValidator validator = null; if (dictPath.getName().equals(Flags.DEFAULT_DICT_PATH)) { if (this.dictCutOff == Flags.DEFAULT_DICT_CUTOFF) { validator = new POSTaggerCrossValidator(this.lang, params, null, null, null, this.posTaggerFactory.getClass().getName(), this.listeners .toArray(new POSTaggerEvaluationMonitor[this.listeners.size()])); } else { validator = new POSTaggerCrossValidator(this.lang, params, null, null, this.dictCutOff, this.posTaggerFactory.getClass().getName(), this.listeners .toArray(new POSTaggerEvaluationMonitor[this.listeners.size()])); } } else { if (this.dictCutOff == Flags.DEFAULT_DICT_CUTOFF) { validator = new POSTaggerCrossValidator(this.lang, params, dictPath, null, null, this.posTaggerFactory.getClass().getName(), this.listeners .toArray(new POSTaggerEvaluationMonitor[this.listeners.size()])); } else { validator = new POSTaggerCrossValidator(this.lang, params, dictPath, null, this.dictCutOff, this.posTaggerFactory.getClass().getName(), this.listeners .toArray(new POSTaggerEvaluationMonitor[this.listeners.size()])); } } return validator; }
Example #12
Source File: POSCrossValidator.java From ixa-pipe-pos with Apache License 2.0 | 5 votes |
private void getEvalListeners(final TrainingParameters params) { if (params.getSettings().get("EvaluationType").equalsIgnoreCase("error")) { this.listeners.add(new POSEvaluationErrorListener()); } if (params.getSettings().get("EvaluationType").equalsIgnoreCase("detailed")) { this.detailedListener = new POSTaggerFineGrainedReportListener(); this.listeners.add(this.detailedListener); } }
Example #13
Source File: POSCrossValidator.java From ixa-pipe-pos with Apache License 2.0 | 5 votes |
private void createPOSFactory(final TrainingParameters params) { final String featureSet = Flags.getFeatureSet(params); if (featureSet.equalsIgnoreCase("Opennlp")) { this.posTaggerFactory = new POSTaggerFactory(); } else { this.posTaggerFactory = new BaselineFactory(); } }
Example #14
Source File: POSCrossValidator.java From ixa-pipe-pos with Apache License 2.0 | 5 votes |
/** * Construct a CrossValidator. In the params parameter there is information * about the language, the featureset, and whether to use pos tag dictionaries * or automatically created dictionaries from the training set. * * @param params * the training parameters * @throws IOException * the io exceptions */ public POSCrossValidator(final TrainingParameters params) throws IOException { this.lang = Flags.getLanguage(params); final String trainData = Flags.getDataSet("TrainSet", params); final ObjectStream<String> trainStream = InputOutputUtils .readFileIntoMarkableStreamFactory(trainData); this.trainSamples = new WordTagSampleStream(trainStream); this.dictCutOff = Flags.getAutoDictFeatures(params); this.folds = Flags.getFolds(params); createPOSFactory(params); getEvalListeners(params); }
Example #15
Source File: OpenNlpDoccatRecommender.java From inception with Apache License 2.0 | 5 votes |
private DoccatModel train(List<DocumentSample> aSamples, TrainingParameters aParameters) throws RecommendationException { try (DocumentSampleStream stream = new DocumentSampleStream(aSamples)) { DoccatFactory factory = new DoccatFactory(); return DocumentCategorizerME.train("unknown", stream, aParameters, factory); } catch (IOException e) { throw new RecommendationException( "Exception during training the OpenNLP Document Categorizer model.", e); } }
Example #16
Source File: FixedTrainer.java From ixa-pipe-pos with Apache License 2.0 | 5 votes |
/** * Extends the {@code AbstractTrainer} providing some {@code POSTaggerFactory} * . * * @param params * the training parameters * @throws IOException * the io exception */ public FixedTrainer(final TrainingParameters params) throws IOException { super(params); final String dictPath = Flags.getDictionaryFeatures(params); setPosTaggerFactory(getTrainerFactory(params)); createTagDictionary(dictPath); createAutomaticDictionary(getDictSamples(), getDictCutOff()); }
Example #17
Source File: Flags.java From ixa-pipe-pos with Apache License 2.0 | 5 votes |
public static Integer getFolds(final TrainingParameters params) { Integer folds = null; if (params.getSettings().get("Folds") == null) { folds = Flags.DEFAULT_FOLDS_VALUE; } else { folds = Integer.parseInt(params.getSettings().get("Folds")); } return folds; }
Example #18
Source File: Flags.java From ixa-pipe-pos with Apache License 2.0 | 5 votes |
public static Integer getNgramDictFeatures(final TrainingParameters params) { String dictionaryFlag = null; if (params.getSettings().get("NgramDictFeatures") != null) { dictionaryFlag = params.getSettings().get("NgramDictFeatures"); } else { dictionaryFlag = Integer.toString(Flags.DEFAULT_DICT_CUTOFF); } return Integer.parseInt(dictionaryFlag); }
Example #19
Source File: Flags.java From ixa-pipe-pos with Apache License 2.0 | 5 votes |
public static Integer getAutoDictFeatures(final TrainingParameters params) { String dictionaryFlag = null; if (params.getSettings().get("AutoDictFeatures") != null) { dictionaryFlag = params.getSettings().get("AutoDictFeatures"); } else { dictionaryFlag = Integer.toString(Flags.DEFAULT_DICT_CUTOFF); } return Integer.parseInt(dictionaryFlag); }
Example #20
Source File: Flags.java From ixa-pipe-pos with Apache License 2.0 | 5 votes |
public static String getDictionaryFeatures(final TrainingParameters params) { String dictionaryFlag = null; if (params.getSettings().get("DictionaryFeatures") != null) { dictionaryFlag = params.getSettings().get("DictionaryFeatures"); } else { dictionaryFlag = Flags.DEFAULT_DICT_PATH; } return dictionaryFlag; }
Example #21
Source File: OpenNlpNerRecommenderTraits.java From inception with Apache License 2.0 | 5 votes |
@JsonIgnore public TrainingParameters getParameters() { TrainingParameters parameters = TrainingParameters.defaultParams(); parameters.put(AbstractTrainer.VERBOSE_PARAM, "false"); parameters.put(TrainingParameters.THREADS_PARAM, numThreads); return parameters; }
Example #22
Source File: Flags.java From ixa-pipe-pos with Apache License 2.0 | 5 votes |
public static Integer getBeamsize(final TrainingParameters params) { Integer beamsize = null; if (params.getSettings().get("BeamSize") == null) { beamsize = Flags.DEFAULT_BEAM_SIZE; } else { beamsize = Integer.parseInt(params.getSettings().get("BeamSize")); } return beamsize; }
Example #23
Source File: Flags.java From ixa-pipe-pos with Apache License 2.0 | 5 votes |
public static String getCorpusFormat(final TrainingParameters params) { String corpusFormat = null; if (params.getSettings().get("CorpusFormat") == null) { corpusFormatException(); } else { corpusFormat = params.getSettings().get("CorpusFormat"); } return corpusFormat; }
Example #24
Source File: Flags.java From ixa-pipe-pos with Apache License 2.0 | 5 votes |
public static String getModel(final TrainingParameters params) { String model = null; if (params.getSettings().get("OutputModel") == null) { modelException(); } else if (params.getSettings().get("OutputModel") != null && params.getSettings().get("OutputModel").length() == 0) { modelException(); } else { model = params.getSettings().get("OutputModel"); } return model; }
Example #25
Source File: Flags.java From ixa-pipe-pos with Apache License 2.0 | 5 votes |
public static String getDataSet(final String dataset, final TrainingParameters params) { String trainSet = null; if (params.getSettings().get(dataset) == null) { datasetException(); } else { trainSet = params.getSettings().get(dataset); } return trainSet; }
Example #26
Source File: Flags.java From ixa-pipe-pos with Apache License 2.0 | 5 votes |
public static String getLanguage(final TrainingParameters params) { String lang = null; if (params.getSettings().get("Language") == null) { langException(); } else { lang = params.getSettings().get("Language"); } return lang; }
Example #27
Source File: Flags.java From ixa-pipe-pos with Apache License 2.0 | 5 votes |
public static String getComponent(final TrainingParameters params) { String component = null; if (params.getSettings().get("Component") == null) { componentException(); } else { component = params.getSettings().get("Component"); } return component; }
Example #28
Source File: CLI.java From ixa-pipe-pos with Apache License 2.0 | 5 votes |
/** * Main access to the cross validation. * @throws IOException * input output exception if problems with corpora */ public final void crossValidate() throws IOException { final String paramFile = this.parsedArguments.getString("params"); final TrainingParameters params = InputOutputUtils .loadTrainingParameters(paramFile); final POSCrossValidator crossValidator = new POSCrossValidator(params); crossValidator.crossValidate(params); }
Example #29
Source File: OpenNlpNerRecommender.java From inception with Apache License 2.0 | 5 votes |
private TokenNameFinderModel train(List<NameSample> aNameSamples, TrainingParameters aParameters) throws RecommendationException { try (NameSampleStream stream = new NameSampleStream(aNameSamples)) { TokenNameFinderFactory finderFactory = new TokenNameFinderFactory(); return NameFinderME.train("unknown", null, stream, aParameters, finderFactory); } catch (IOException e) { LOG.error("Exception during training the OpenNLP Named Entity Recognizer model.", e); throw new RecommendationException("Error while training OpenNLP pos", e); } }
Example #30
Source File: OpenNlpPosRecommenderTraits.java From inception with Apache License 2.0 | 5 votes |
@JsonIgnore public TrainingParameters getParameters() { TrainingParameters parameters = TrainingParameters.defaultParams(); parameters.put(AbstractTrainer.VERBOSE_PARAM, "false"); parameters.put(TrainingParameters.THREADS_PARAM, numThreads); return parameters; }