Example #1
public void givenLanguageDictionary_whenLanguageDetect_thenLanguageIsDetected() throws FileNotFoundException, IOException {
    InputStreamFactory dataIn = new MarkableFileInputStreamFactory(new File("src/main/resources/models/DoccatSample.txt"));
    ObjectStream lineStream = new PlainTextByLineStream(dataIn, "UTF-8");
    LanguageDetectorSampleStream sampleStream = new LanguageDetectorSampleStream(lineStream);
    TrainingParameters params = new TrainingParameters();
    params.put(TrainingParameters.ITERATIONS_PARAM, 100);
    params.put(TrainingParameters.CUTOFF_PARAM, 5);
    params.put("DataIndexer", "TwoPass");
    params.put(TrainingParameters.ALGORITHM_PARAM, "NAIVEBAYES");

    LanguageDetectorModel model = LanguageDetectorME.train(sampleStream, params, new LanguageDetectorFactory());

    LanguageDetector ld = new LanguageDetectorME(model);
    Language[] languages = ld.predictLanguages("estava em uma marcenaria na Rua Bruno");
    assertThat(Arrays.asList(languages)).extracting("lang", "confidence").contains(tuple("pob", 0.9999999950605625),
             tuple("ita", 4.939427661577956E-9), tuple("spa", 9.665954064665144E-15),
            tuple("fra", 8.250349924885834E-25));
Example #2
private static void trainingOpenNLPNERModel() {
    try (OutputStream modelOutputStream = new BufferedOutputStream(
            new FileOutputStream(new File("modelFile")));) {
        ObjectStream<String> lineStream = new PlainTextByLineStream(
                new FileInputStream("en-ner-person.train"), "UTF-8");
        ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream);

        TokenNameFinderModel model = NameFinderME.train("en", "person", sampleStream,
                null, 100, 5);

    } catch (IOException ex) {