opennlp.tools.doccat.DoccatModel Java Examples

The following examples show how to use opennlp.tools.doccat.DoccatModel. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: Chapter6.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License

6 votes

private static void usingOpenNLP() {
    try (InputStream modelIn = new FileInputStream(
            new File("en-animal.model"));) {
        DoccatModel model = new DoccatModel(modelIn);
        DocumentCategorizerME categorizer = new DocumentCategorizerME(model);
        double[] outcomes = categorizer.categorize(inputText);
        for (int i = 0; i < categorizer.getNumberOfCategories(); i++) {
            String category = categorizer.getCategory(i);
            System.out.println(category + " - " + outcomes[i]);
        }
        System.out.println(categorizer.getBestCategory(outcomes));
        System.out.println(categorizer.getAllResults(outcomes));
    } catch (IOException ex) {
        ex.printStackTrace();
    }
}

Example #2

Source File: OpenNlpDoccatRecommender.java From inception with Apache License 2.0

6 votes

@Override
public void train(RecommenderContext aContext, List<CAS> aCasses)
    throws RecommendationException
{
    List<DocumentSample> docSamples = extractSamples(aCasses);
    
    if (docSamples.size() < 2) {
        LOG.info("Not enough training data: [{}] items", docSamples.size());
        return;
    }
    
    // The beam size controls how many results are returned at most. But even if the user
    // requests only few results, we always use at least the default bean size recommended by
    // OpenNLP
    int beamSize = Math.max(maxRecommendations, NameFinderME.DEFAULT_BEAM_SIZE);

    TrainingParameters params = traits.getParameters();
    params.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize));
    
    DoccatModel model = train(docSamples, params);
    
    aContext.put(KEY_MODEL, model);
}

Example #3

Source File: OpenNlpDoccatRecommender.java From inception with Apache License 2.0

5 votes

@Override
public void predict(RecommenderContext aContext, CAS aCas) throws RecommendationException
{
    DoccatModel model = aContext.get(KEY_MODEL).orElseThrow(() -> 
            new RecommendationException("Key [" + KEY_MODEL + "] not found in context"));
    
    DocumentCategorizerME finder = new DocumentCategorizerME(model);

    Type sentenceType = getType(aCas, Sentence.class);
    Type predictedType = getPredictedType(aCas);
    Type tokenType = getType(aCas, Token.class);
    Feature scoreFeature = getScoreFeature(aCas);
    Feature predictedFeature = getPredictedFeature(aCas);
    Feature isPredictionFeature = getIsPredictionFeature(aCas);

    int predictionCount = 0;
    for (AnnotationFS sentence : select(aCas, sentenceType)) {
        if (predictionCount >= traits.getPredictionLimit()) {
            break;
        }
        predictionCount++;
        
        List<AnnotationFS> tokenAnnotations = selectCovered(tokenType, sentence);
        String[] tokens = tokenAnnotations.stream()
            .map(AnnotationFS::getCoveredText)
            .toArray(String[]::new);

        double[] outcome = finder.categorize(tokens);
        String label = finder.getBestCategory(outcome);
        
        AnnotationFS annotation = aCas.createAnnotation(predictedType, sentence.getBegin(),
                sentence.getEnd());
        annotation.setStringValue(predictedFeature, label);
        annotation.setDoubleValue(scoreFeature, NumberUtils.max(outcome));
        annotation.setBooleanValue(isPredictionFeature, true);
        aCas.addFsToIndexes(annotation);
    }
}

Example #4

Source File: OpenNlpDoccatRecommender.java From inception with Apache License 2.0

5 votes

private DoccatModel train(List<DocumentSample> aSamples, TrainingParameters aParameters)
    throws RecommendationException
{
    try (DocumentSampleStream stream = new DocumentSampleStream(aSamples)) {
        DoccatFactory factory = new DoccatFactory();
        return DocumentCategorizerME.train("unknown", stream, aParameters, factory);
    }
    catch (IOException e) {
        throw new RecommendationException(
                "Exception during training the OpenNLP Document Categorizer model.", e);
    }
}

Example #5

Source File: OpenNLPIntentMatcher.java From Mutters with Apache License 2.0

5 votes

/**
 * Constructor. Sets up the matcher to use the specified model (via a URL) and
 * specifies the minimum and maybe match score.
 * 
 * @param intentModelUrl  A URL pointing at the document categoriser model file
 *                        to load.
 * @param minMatchScore   The minimum match score for an intent match to be
 *                        considered good.
 * @param maybeMatchScore The maybe match score. Use -1 to disable maybe
 *                        matching.
 * @param tokenizer       The tokenizer to use when tokenizing an utterance.
 * @param slotMatcher     The slot matcher to use to extract slots from the
 *                        utterance.
 */
public OpenNLPIntentMatcher(URL intentModelUrl, Tokenizer tokenizer, SlotMatcher slotMatcher, float minMatchScore,
    float maybeMatchScore)
{
  super(tokenizer, slotMatcher, minMatchScore, maybeMatchScore);

  try
  {
    model = new DoccatModel(intentModelUrl);
  }
  catch (Exception e)
  {
    throw new IllegalArgumentException("Unable to load intent model", e);
  }
}

Example #6

Source File: TestCategorization.java From Mutters with Apache License 2.0

5 votes

@Test
public void testModelLoad() throws Exception
{
  URL modelUrl = Thread.currentThread().getContextClassLoader().getResource("models/en-cat-taxi-intents.bin");
  assertThat(modelUrl, is(notNullValue()));

  DoccatModel model = new DoccatModel(modelUrl);
  assertThat(model, is(notNullValue()));
}

Example #7

Source File: TestCategorization.java From Mutters with Apache License 2.0

5 votes

@Test
public void testCategorization() throws Exception
{
  URL modelUrl = Thread.currentThread().getContextClassLoader().getResource("models/en-cat-taxi-intents.bin");
  assertThat(modelUrl, is(notNullValue()));

  DoccatModel model = new DoccatModel(modelUrl);
  assertThat(model, is(notNullValue()));

  DocumentCategorizerME myCategorizer = new DocumentCategorizerME(model);
  // model was built with OpenNLP whitespace tokenizer
  OpenNLPTokenizer tokenizer = new OpenNLPTokenizer(WhitespaceTokenizer.INSTANCE);

  String category = myCategorizer.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("Order me a taxi")));
  assertThat(category, is(notNullValue()));
  assertThat(category, is("OrderTaxi"));

  category = myCategorizer.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("Send me a taxi")));
  assertThat(category, is(notNullValue()));
  assertThat(category, is("OrderTaxi"));

  category = myCategorizer
      .getBestCategory(myCategorizer.categorize(tokenizer.tokenize("Send a taxi to 12 Pleasent Street")));
  assertThat(category, is(notNullValue()));
  assertThat(category, is("OrderTaxi"));

  category = myCategorizer.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("Cancel my cab")));
  assertThat(category, is(notNullValue()));
  assertThat(category, is("CancelTaxi"));

  category = myCategorizer.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("Where is my taxi ?")));
  assertThat(category, is(notNullValue()));
  assertThat(category, is("WhereTaxi"));

  category = myCategorizer
      .getBestCategory(myCategorizer.categorize(tokenizer.tokenize("The address is 136 River Road")));
  assertThat(category, is(notNullValue()));
  assertThat(category, is("GaveAddress"));
}