opennlp.tools.util.TrainingParameters Java Examples

The following examples show how to use opennlp.tools.util.TrainingParameters. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OpenNlpDoccatRecommender.java    From inception with Apache License 2.0 6 votes vote down vote up
@Override
public void train(RecommenderContext aContext, List<CAS> aCasses)
    throws RecommendationException
{
    List<DocumentSample> docSamples = extractSamples(aCasses);
    
    if (docSamples.size() < 2) {
        LOG.info("Not enough training data: [{}] items", docSamples.size());
        return;
    }
    
    // The beam size controls how many results are returned at most. But even if the user
    // requests only few results, we always use at least the default bean size recommended by
    // OpenNLP
    int beamSize = Math.max(maxRecommendations, NameFinderME.DEFAULT_BEAM_SIZE);

    TrainingParameters params = traits.getParameters();
    params.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize));
    
    DoccatModel model = train(docSamples, params);
    
    aContext.put(KEY_MODEL, model);
}
 
Example #2
Source File: LanguageDetectorAndTrainingDataUnitTest.java    From tutorials with MIT License 6 votes vote down vote up
@Test
public void givenLanguageDictionary_whenLanguageDetect_thenLanguageIsDetected() throws FileNotFoundException, IOException {
    InputStreamFactory dataIn = new MarkableFileInputStreamFactory(new File("src/main/resources/models/DoccatSample.txt"));
    ObjectStream lineStream = new PlainTextByLineStream(dataIn, "UTF-8");
    LanguageDetectorSampleStream sampleStream = new LanguageDetectorSampleStream(lineStream);
    TrainingParameters params = new TrainingParameters();
    params.put(TrainingParameters.ITERATIONS_PARAM, 100);
    params.put(TrainingParameters.CUTOFF_PARAM, 5);
    params.put("DataIndexer", "TwoPass");
    params.put(TrainingParameters.ALGORITHM_PARAM, "NAIVEBAYES");

    LanguageDetectorModel model = LanguageDetectorME.train(sampleStream, params, new LanguageDetectorFactory());

    LanguageDetector ld = new LanguageDetectorME(model);
    Language[] languages = ld.predictLanguages("estava em uma marcenaria na Rua Bruno");
    
    assertThat(Arrays.asList(languages)).extracting("lang", "confidence").contains(tuple("pob", 0.9999999950605625),
             tuple("ita", 4.939427661577956E-9), tuple("spa", 9.665954064665144E-15),
            tuple("fra", 8.250349924885834E-25));
}
 
Example #3
Source File: AbstractTaggerTrainer.java    From ixa-pipe-pos with Apache License 2.0 6 votes vote down vote up
public final POSModel train(final TrainingParameters params) {
  // features
  if (getPosTaggerFactory() == null) {
    throw new IllegalStateException(
        "Classes derived from AbstractTrainer must "
            + " create a POSTaggerFactory features!");
  }
  // training model
  POSModel trainedModel = null;
  POSEvaluator posEvaluator = null;
  try {
    trainedModel = POSTaggerME.train(this.lang, this.trainSamples, params,
        getPosTaggerFactory());
    final POSTaggerME posTagger = new POSTaggerME(trainedModel);
    posEvaluator = new POSEvaluator(posTagger);
    posEvaluator.evaluate(this.testSamples);
  } catch (final IOException e) {
    System.err.println("IO error while loading training and test sets!");
    e.printStackTrace();
    System.exit(1);
  }
  System.out.println("Final result: " + posEvaluator.getWordAccuracy());
  return trainedModel;
}
 
Example #4
Source File: AbstractTaggerTrainer.java    From ixa-pipe-pos with Apache License 2.0 6 votes vote down vote up
/**
 * Construct an AbstractTrainer. In the params parameter there is information
 * about the language, the featureset, and whether to use pos tag dictionaries
 * or automatically created dictionaries from the training set.
 * 
 * @param params
 *          the training parameters
 * @throws IOException
 *           the io exceptions
 */
public AbstractTaggerTrainer(final TrainingParameters params) throws IOException {
  this.lang = Flags.getLanguage(params);
  final String trainData = Flags.getDataSet("TrainSet", params);
  final String testData = Flags.getDataSet("TestSet", params);
  final ObjectStream<String> trainStream = InputOutputUtils
      .readFileIntoMarkableStreamFactory(trainData);
  this.trainSamples = new MorphoSampleStream(trainStream);
  final ObjectStream<String> testStream = InputOutputUtils
      .readFileIntoMarkableStreamFactory(testData);
  this.testSamples = new MorphoSampleStream(testStream);
  final ObjectStream<String> dictStream = InputOutputUtils
      .readFileIntoMarkableStreamFactory(trainData);
  setDictSamples(new MorphoSampleStream(dictStream));
  this.dictCutOff = Flags.getAutoDictFeatures(params);
  this.ngramCutOff = Flags.getNgramDictFeatures(params);

}
 
Example #5
Source File: AbstractLemmatizerTrainer.java    From ixa-pipe-pos with Apache License 2.0 6 votes vote down vote up
public final LemmatizerModel train(final TrainingParameters params) {
  // features
  if (getLemmatizerFactory() == null) {
    throw new IllegalStateException(
        "Classes derived from AbstractLemmatizerTrainer must "
            + " create a LemmatizerFactory features!");
  }
  // training model
  LemmatizerModel trainedModel = null;
  LemmatizerEvaluator lemmatizerEvaluator = null;
  try {
    trainedModel = LemmatizerME.train(this.lang, this.trainSamples, params,
        getLemmatizerFactory());
    final LemmatizerME lemmatizer = new LemmatizerME(trainedModel);
    lemmatizerEvaluator = new LemmatizerEvaluator(lemmatizer);
    lemmatizerEvaluator.evaluate(this.testSamples);
  } catch (final IOException e) {
    System.err.println("IO error while loading training and test sets!");
    e.printStackTrace();
    System.exit(1);
  }
  System.out.println("Final result: " + lemmatizerEvaluator.getWordAccuracy());
  return trainedModel;
}
 
Example #6
Source File: OpenNlpNerRecommender.java    From inception with Apache License 2.0 6 votes vote down vote up
@Override
public void train(RecommenderContext aContext, List<CAS> aCasses)
    throws RecommendationException
{
    List<NameSample> nameSamples = extractNameSamples(aCasses);
    
    if (nameSamples.size() < 2) {
        LOG.info("Not enough training data: [{}] items", nameSamples.size());
        return;
    }
    
    // The beam size controls how many results are returned at most. But even if the user
    // requests only few results, we always use at least the default bean size recommended by
    // OpenNLP
    int beamSize = Math.max(maxRecommendations, NameFinderME.DEFAULT_BEAM_SIZE);

    TrainingParameters params = traits.getParameters();
    params.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize));
    
    TokenNameFinderModel model = train(nameSamples, params);
    
    aContext.put(KEY_MODEL, model);
}
 
Example #7
Source File: OpenNlpPosRecommender.java    From inception with Apache License 2.0 6 votes vote down vote up
@Nullable
private POSModel train(List<POSSample> aPosSamples, TrainingParameters aParameters)
    throws RecommendationException
{
    if (aPosSamples.isEmpty()) {
        return null;
    }

    try (POSSampleStream stream = new POSSampleStream(aPosSamples)) {
        POSTaggerFactory taggerFactory = new POSTaggerFactory();
        return POSTaggerME.train("unknown", stream, aParameters, taggerFactory);
    }
    catch (IOException e) {
        throw new RecommendationException("Error training model", e);
    }
}
 
Example #8
Source File: OpenNlpPosRecommender.java    From inception with Apache License 2.0 6 votes vote down vote up
@Override
public void train(RecommenderContext aContext, List<CAS> aCasses)
    throws RecommendationException
{
    List<POSSample> posSamples = extractPosSamples(aCasses);
    
    if (posSamples.size() < 2) {
        LOG.info("Not enough training data: [{}] items", posSamples.size());
        return;
    }

    // The beam size controls how many results are returned at most. But even if the user
    // requests only few results, we always use at least the default bean size recommended by
    // OpenNLP
    int beamSize = Math.max(maxRecommendations, POSTaggerME.DEFAULT_BEAM_SIZE);

    TrainingParameters params = traits.getParameters();
    params.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize));
    POSModel model = train(posSamples, params);

    aContext.put(KEY_MODEL, model);
}
 
Example #9
Source File: Flags.java    From ixa-pipe-pos with Apache License 2.0 5 votes vote down vote up
public static String getFeatureSet(final TrainingParameters params) {
  String featureSet = null;
  if (params.getSettings().get("FeatureSet") != null) {
    featureSet = params.getSettings().get("FeatureSet");
  } else {
    featureSet = Flags.DEFAULT_FEATURESET_FLAG;
  }
  return featureSet;
}
 
Example #10
Source File: OpenNlpDoccatRecommenderTraits.java    From inception with Apache License 2.0 5 votes vote down vote up
public TrainingParameters getParameters()
{
    TrainingParameters parameters = TrainingParameters.defaultParams();
    parameters.put(AbstractTrainer.VERBOSE_PARAM, false);
    parameters.put(TrainingParameters.ITERATIONS_PARAM, iterations);
    parameters.put(TrainingParameters.CUTOFF_PARAM, cutoff);
    parameters.put(TrainingParameters.THREADS_PARAM, numThreads);
    return parameters;
}
 
Example #11
Source File: POSCrossValidator.java    From ixa-pipe-pos with Apache License 2.0 5 votes vote down vote up
/**
 * Get the postagger cross validator.
 * 
 * @param params
 *          the training parameters
 * @return the pos tagger cross validator
 */
private POSTaggerCrossValidator getPOSTaggerCrossValidator(
    final TrainingParameters params) {
  final File dictPath = new File(Flags.getDictionaryFeatures(params));
  // features
  if (this.posTaggerFactory == null) {
    throw new IllegalStateException(
        "You must create the POSTaggerFactory features!");
  }
  POSTaggerCrossValidator validator = null;
  if (dictPath.getName().equals(Flags.DEFAULT_DICT_PATH)) {
    if (this.dictCutOff == Flags.DEFAULT_DICT_CUTOFF) {
      validator = new POSTaggerCrossValidator(this.lang, params, null, null,
          null, this.posTaggerFactory.getClass().getName(),
          this.listeners
              .toArray(new POSTaggerEvaluationMonitor[this.listeners.size()]));
    } else {
      validator = new POSTaggerCrossValidator(this.lang, params, null, null,
          this.dictCutOff, this.posTaggerFactory.getClass().getName(),
          this.listeners
              .toArray(new POSTaggerEvaluationMonitor[this.listeners.size()]));
    }
  } else {
    if (this.dictCutOff == Flags.DEFAULT_DICT_CUTOFF) {
      validator = new POSTaggerCrossValidator(this.lang, params, dictPath,
          null, null, this.posTaggerFactory.getClass().getName(),
          this.listeners
              .toArray(new POSTaggerEvaluationMonitor[this.listeners.size()]));
    } else {
      validator = new POSTaggerCrossValidator(this.lang, params, dictPath,
          null, this.dictCutOff, this.posTaggerFactory.getClass().getName(),
          this.listeners
              .toArray(new POSTaggerEvaluationMonitor[this.listeners.size()]));
    }
  }
  return validator;
}
 
Example #12
Source File: POSCrossValidator.java    From ixa-pipe-pos with Apache License 2.0 5 votes vote down vote up
private void getEvalListeners(final TrainingParameters params) {
  if (params.getSettings().get("EvaluationType").equalsIgnoreCase("error")) {
    this.listeners.add(new POSEvaluationErrorListener());
  }
  if (params.getSettings().get("EvaluationType").equalsIgnoreCase("detailed")) {
    this.detailedListener = new POSTaggerFineGrainedReportListener();
    this.listeners.add(this.detailedListener);
  }
}
 
Example #13
Source File: POSCrossValidator.java    From ixa-pipe-pos with Apache License 2.0 5 votes vote down vote up
private void createPOSFactory(final TrainingParameters params) {
  final String featureSet = Flags.getFeatureSet(params);
  if (featureSet.equalsIgnoreCase("Opennlp")) {
    this.posTaggerFactory = new POSTaggerFactory();
  } else {
    this.posTaggerFactory = new BaselineFactory();
  }
}
 
Example #14
Source File: POSCrossValidator.java    From ixa-pipe-pos with Apache License 2.0 5 votes vote down vote up
/**
 * Construct a CrossValidator. In the params parameter there is information
 * about the language, the featureset, and whether to use pos tag dictionaries
 * or automatically created dictionaries from the training set.
 * 
 * @param params
 *          the training parameters
 * @throws IOException
 *           the io exceptions
 */
public POSCrossValidator(final TrainingParameters params) throws IOException {
  this.lang = Flags.getLanguage(params);
  final String trainData = Flags.getDataSet("TrainSet", params);
  final ObjectStream<String> trainStream = InputOutputUtils
      .readFileIntoMarkableStreamFactory(trainData);
  this.trainSamples = new WordTagSampleStream(trainStream);
  this.dictCutOff = Flags.getAutoDictFeatures(params);
  this.folds = Flags.getFolds(params);
  createPOSFactory(params);
  getEvalListeners(params);
}
 
Example #15
Source File: OpenNlpDoccatRecommender.java    From inception with Apache License 2.0 5 votes vote down vote up
private DoccatModel train(List<DocumentSample> aSamples, TrainingParameters aParameters)
    throws RecommendationException
{
    try (DocumentSampleStream stream = new DocumentSampleStream(aSamples)) {
        DoccatFactory factory = new DoccatFactory();
        return DocumentCategorizerME.train("unknown", stream, aParameters, factory);
    }
    catch (IOException e) {
        throw new RecommendationException(
                "Exception during training the OpenNLP Document Categorizer model.", e);
    }
}
 
Example #16
Source File: FixedTrainer.java    From ixa-pipe-pos with Apache License 2.0 5 votes vote down vote up
/**
 * Extends the {@code AbstractTrainer} providing some {@code POSTaggerFactory}
 * .
 * 
 * @param params
 *          the training parameters
 * @throws IOException
 *           the io exception
 */
public FixedTrainer(final TrainingParameters params) throws IOException {
  super(params);

  final String dictPath = Flags.getDictionaryFeatures(params);
  setPosTaggerFactory(getTrainerFactory(params));
  createTagDictionary(dictPath);
  createAutomaticDictionary(getDictSamples(), getDictCutOff());

}
 
Example #17
Source File: Flags.java    From ixa-pipe-pos with Apache License 2.0 5 votes vote down vote up
public static Integer getFolds(final TrainingParameters params) {
  Integer folds = null;
  if (params.getSettings().get("Folds") == null) {
    folds = Flags.DEFAULT_FOLDS_VALUE;
  } else {
    folds = Integer.parseInt(params.getSettings().get("Folds"));
  }
  return folds;
}
 
Example #18
Source File: Flags.java    From ixa-pipe-pos with Apache License 2.0 5 votes vote down vote up
public static Integer getNgramDictFeatures(final TrainingParameters params) {
  String dictionaryFlag = null;
  if (params.getSettings().get("NgramDictFeatures") != null) {
    dictionaryFlag = params.getSettings().get("NgramDictFeatures");
  } else {
    dictionaryFlag = Integer.toString(Flags.DEFAULT_DICT_CUTOFF);
  }
  return Integer.parseInt(dictionaryFlag);
}
 
Example #19
Source File: Flags.java    From ixa-pipe-pos with Apache License 2.0 5 votes vote down vote up
public static Integer getAutoDictFeatures(final TrainingParameters params) {
  String dictionaryFlag = null;
  if (params.getSettings().get("AutoDictFeatures") != null) {
    dictionaryFlag = params.getSettings().get("AutoDictFeatures");
  } else {
    dictionaryFlag = Integer.toString(Flags.DEFAULT_DICT_CUTOFF);
  }
  return Integer.parseInt(dictionaryFlag);
}
 
Example #20
Source File: Flags.java    From ixa-pipe-pos with Apache License 2.0 5 votes vote down vote up
public static String getDictionaryFeatures(final TrainingParameters params) {
  String dictionaryFlag = null;
  if (params.getSettings().get("DictionaryFeatures") != null) {
    dictionaryFlag = params.getSettings().get("DictionaryFeatures");
  } else {
    dictionaryFlag = Flags.DEFAULT_DICT_PATH;
  }
  return dictionaryFlag;
}
 
Example #21
Source File: OpenNlpNerRecommenderTraits.java    From inception with Apache License 2.0 5 votes vote down vote up
@JsonIgnore
public TrainingParameters getParameters()
{
    TrainingParameters parameters = TrainingParameters.defaultParams();
    parameters.put(AbstractTrainer.VERBOSE_PARAM, "false");
    parameters.put(TrainingParameters.THREADS_PARAM, numThreads);
    return parameters;
}
 
Example #22
Source File: Flags.java    From ixa-pipe-pos with Apache License 2.0 5 votes vote down vote up
public static Integer getBeamsize(final TrainingParameters params) {
  Integer beamsize = null;
  if (params.getSettings().get("BeamSize") == null) {
    beamsize = Flags.DEFAULT_BEAM_SIZE;
  } else {
    beamsize = Integer.parseInt(params.getSettings().get("BeamSize"));
  }
  return beamsize;
}
 
Example #23
Source File: Flags.java    From ixa-pipe-pos with Apache License 2.0 5 votes vote down vote up
public static String getCorpusFormat(final TrainingParameters params) {
  String corpusFormat = null;
  if (params.getSettings().get("CorpusFormat") == null) {
    corpusFormatException();
  } else {
    corpusFormat = params.getSettings().get("CorpusFormat");
  }
  return corpusFormat;
}
 
Example #24
Source File: Flags.java    From ixa-pipe-pos with Apache License 2.0 5 votes vote down vote up
public static String getModel(final TrainingParameters params) {
  String model = null;
  if (params.getSettings().get("OutputModel") == null) {
    modelException();
  } else if (params.getSettings().get("OutputModel") != null
      && params.getSettings().get("OutputModel").length() == 0) {
    modelException();
  } else {
    model = params.getSettings().get("OutputModel");
  }
  return model;
}
 
Example #25
Source File: Flags.java    From ixa-pipe-pos with Apache License 2.0 5 votes vote down vote up
public static String getDataSet(final String dataset,
    final TrainingParameters params) {
  String trainSet = null;
  if (params.getSettings().get(dataset) == null) {
    datasetException();
  } else {
    trainSet = params.getSettings().get(dataset);
  }
  return trainSet;
}
 
Example #26
Source File: Flags.java    From ixa-pipe-pos with Apache License 2.0 5 votes vote down vote up
public static String getLanguage(final TrainingParameters params) {
  String lang = null;
  if (params.getSettings().get("Language") == null) {
    langException();
  } else {
    lang = params.getSettings().get("Language");
  }
  return lang;
}
 
Example #27
Source File: Flags.java    From ixa-pipe-pos with Apache License 2.0 5 votes vote down vote up
public static String getComponent(final TrainingParameters params) {
  String component = null;
  if (params.getSettings().get("Component") == null) {
    componentException();
  } else {
    component = params.getSettings().get("Component");
  }
  return component;
}
 
Example #28
Source File: CLI.java    From ixa-pipe-pos with Apache License 2.0 5 votes vote down vote up
/**
 * Main access to the cross validation.
 * @throws IOException
 *           input output exception if problems with corpora
 */
public final void crossValidate() throws IOException {

  final String paramFile = this.parsedArguments.getString("params");
  final TrainingParameters params = InputOutputUtils
      .loadTrainingParameters(paramFile);
  final POSCrossValidator crossValidator = new POSCrossValidator(params);
  crossValidator.crossValidate(params);
}
 
Example #29
Source File: OpenNlpNerRecommender.java    From inception with Apache License 2.0 5 votes vote down vote up
private TokenNameFinderModel train(List<NameSample> aNameSamples,
        TrainingParameters aParameters)
    throws RecommendationException
{
    try (NameSampleStream stream = new NameSampleStream(aNameSamples)) {
        TokenNameFinderFactory finderFactory = new TokenNameFinderFactory();
        return NameFinderME.train("unknown", null, stream, aParameters, finderFactory);
    } catch (IOException e) {
        LOG.error("Exception during training the OpenNLP Named Entity Recognizer model.", e);
        throw new RecommendationException("Error while training OpenNLP pos", e);
    }
}
 
Example #30
Source File: OpenNlpPosRecommenderTraits.java    From inception with Apache License 2.0 5 votes vote down vote up
@JsonIgnore
public TrainingParameters getParameters()
{
    TrainingParameters parameters = TrainingParameters.defaultParams();
    parameters.put(AbstractTrainer.VERBOSE_PARAM, "false");
    parameters.put(TrainingParameters.THREADS_PARAM, numThreads);
    return parameters;
}