Java Code Examples for opennlp.tools.postag.POSTaggerME#tag()

The following examples show how to use opennlp.tools.postag.POSTaggerME#tag() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Chapter1.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 6 votes vote down vote up
private static void detectingPartsOfSpeechExample() {
    String sentence = "POS processing is useful for enhancing the "
            + "quality of data sent to other elements of a pipeline.";

    POSModel model = new POSModelLoader()
            .load(new File("C:/Current Books/NLP and Java/Models/", "en-pos-maxent.bin"));
    POSTaggerME tagger = new POSTaggerME(model);

    String tokens[] = WhitespaceTokenizer.INSTANCE
            .tokenize(sentence);
    String[] tags = tagger.tag(tokens);

    POSSample sample = new POSSample(tokens, tags);
    String posTokens[] = sample.getSentence();
    String posTags[] = sample.getTags();
    for (int i = 0; i < posTokens.length; i++) {
        System.out.print(posTokens[i] + " - " + posTags[i]);
    }
    System.out.println();

    for (int i = 0; i < tokens.length; i++) {
        System.out.print(tokens[i] + "[" + tags[i] + "] ");
    }
}
 
Example 2
Source File: OpenNLPAnnotator.java    From Stargraph with MIT License 6 votes vote down vote up
@Override
public List<Word> doRun(Language language, String sentence) {
    Tokenizer tokenizer = new TokenizerME(getTokenizerModel(language));
    POSTaggerME tagger = new POSTaggerME(getPOSModel(language));
    String[] tokens = tokenizer.tokenize(sentence);
    String[] tags = tagger.tag(tokens);

    PartOfSpeechSet posSet = PartOfSpeechSet.getPOSSet(language);

    List<Word> words = new ArrayList<>();
    for (int i = 0; i < tokens.length; i++) {
        words.add(new Word(posSet.valueOf(tags[i]), tokens[i]));
    }

    return words;
}
 
Example 3
Source File: LemmetizerUnitTest.java    From tutorials with MIT License 6 votes vote down vote up
@Test
public void givenEnglishDictionary_whenLemmatize_thenLemmasAreDetected() throws Exception {

    SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
    String[] tokens = tokenizer.tokenize("John has a sister named Penny.");

    InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin");
    POSModel posModel = new POSModel(inputStreamPOSTagger);
    POSTaggerME posTagger = new POSTaggerME(posModel);
    String tags[] = posTagger.tag(tokens);
    InputStream dictLemmatizer = getClass().getResourceAsStream("/models/en-lemmatizer.dict");
    DictionaryLemmatizer lemmatizer = new DictionaryLemmatizer(dictLemmatizer);
    String[] lemmas = lemmatizer.lemmatize(tokens, tags);

    assertThat(lemmas).contains("O", "have", "a", "sister", "name", "O", "O");
}
 
Example 4
Source File: ChunkerUnitTest.java    From tutorials with MIT License 6 votes vote down vote up
@Test
public void givenChunkerModel_whenChunk_thenChunksAreDetected() throws Exception {

    SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
    String[] tokens = tokenizer.tokenize("He reckons the current account deficit will narrow to only 8 billion.");

    InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin");
    POSModel posModel = new POSModel(inputStreamPOSTagger);
    POSTaggerME posTagger = new POSTaggerME(posModel);
    String tags[] = posTagger.tag(tokens);

    InputStream inputStreamChunker = new FileInputStream("src/main/resources/models/en-chunker.bin");
    ChunkerModel chunkerModel = new ChunkerModel(inputStreamChunker);
    ChunkerME chunker = new ChunkerME(chunkerModel);
    String[] chunks = chunker.chunk(tokens, tags);
    assertThat(chunks).contains("B-NP", "B-VP", "B-NP", "I-NP", "I-NP", "I-NP", "B-VP", "I-VP", "B-PP", "B-NP", "I-NP", "I-NP", "O");
}
 
Example 5
Source File: NLPExamples.java    From Java-for-Data-Science with MIT License 5 votes vote down vote up
public void POSExample() {
    try (InputStream input = new FileInputStream(
            new File("en-pos-maxent.bin"));) {

        // To lower case example
        String lowerCaseVersion = sentence.toLowerCase();
        out.println(lowerCaseVersion);

        // Pull out tokens
        List<String> list = new ArrayList<>();
        Scanner scanner = new Scanner(sentence);
        while (scanner.hasNext()) {
            list.add(scanner.next());
        }
        // Convert list to an array
        String[] words = new String[1];
        words = list.toArray(words);

        // Build model
        POSModel posModel = new POSModel(input);
        POSTaggerME posTagger = new POSTaggerME(posModel);

        // Tag words
        String[] posTags = posTagger.tag(words);
        for (int i = 0; i < posTags.length; i++) {
            out.println(words[i] + " - " + posTags[i]);
        }

        // Find top sequences
        Sequence sequences[] = posTagger.topKSequences(words);
        for (Sequence sequence : sequences) {
            out.println(sequence);
        }
    } catch (IOException ex) {
        ex.printStackTrace();
    }
}
 
Example 6
Source File: BasicActions.java    From knowledge-extraction with Apache License 2.0 5 votes vote down vote up
public String[] testTagger(){
	String[] tags = {};
	try (InputStream modelIn = BasicActions.class.getClassLoader().
				getResourceAsStream(Consts.EN_POS_MODEL);){
				
		POSModel posModel = new POSModel(modelIn);
		POSTaggerME tagger = new POSTaggerME(posModel);
		tags = tagger.tag(testTokenizer());
			System.out.println(Arrays.toString(tags));
	} catch (IOException e) {
		e.printStackTrace();
	}
	return tags;
}
 
Example 7
Source File: POSTaggerUnitTest.java    From tutorials with MIT License 5 votes vote down vote up
@Test
public void givenPOSModel_whenPOSTagging_thenPOSAreDetected() throws Exception {

    SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
    String[] tokens = tokenizer.tokenize("John has a sister named Penny.");

    InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin");
    POSModel posModel = new POSModel(inputStreamPOSTagger);
    POSTaggerME posTagger = new POSTaggerME(posModel);
    String tags[] = posTagger.tag(tokens);
    assertThat(tags).contains("NNP", "VBZ", "DT", "NN", "VBN", "NNP", ".");
}
 
Example 8
Source File: Chapter5.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 4 votes vote down vote up
private static void usingOpenNLPPOSModel() {
        System.out.println("OpenNLP POSModel Examples");
        try (InputStream modelIn = new FileInputStream(
                new File(getModelDir(), "en-pos-maxent.bin"));) {
            POSModel model = new POSModel(modelIn);
            POSTaggerME tagger = new POSTaggerME(model);

            // Introduction sentences
//            sentence = tokenizeSentence("The cow jumped over the moon.");
//            sentence = tokenizeSentence("Bill used the force to force the manger to tear the bill in two.");
//            sentence = tokenizeSentence("AFAIK she H8 cth!");
//            sentence = tokenizeSentence("BTW had a GR8 tym at the party BBIAM.");
//            sentence = tokenizeSentence("Whether \"Blue\" was correct or not (it’s not) is debatable");
            String tags[] = tagger.tag(sentence);
            double probs[] = tagger.probs();

            for (int i = 0; i < sentence.length; i++) {
                System.out.print(sentence[i] + "/" + tags[i] + " ");
            }
            System.out.println();
            // Use import opennlp.tools.util.Sequence; instead of
            // import opennlp.model.Sequence
            System.out.println("topSequences");
            Sequence topSequences[] = tagger.topKSequences(sentence);
            for (int i = 0; i < topSequences.length; i++) {
                System.out.println(topSequences[i]);
//                List<String> list = topSequences[i].getOutcomes();
//                for(String outcome : list) {
//                    System.out.print(outcome + " ");
//                    System.out.println();
//                }
            }
            System.out.println();

            System.out.println("occurrences and probabilities");
//            DecimalFormat decimalFormat = new DecimalFormat("##.###");
            for (int i = 0; i < topSequences.length; i++) {
                List<String> outcomes = topSequences[i].getOutcomes();
                double probabilities[] = topSequences[i].getProbs();
                for (int j = 0; j < outcomes.size(); j++) {
                    System.out.printf("%s/%5.3f ",outcomes.get(j),probabilities[j]);
                }
                System.out.println();
            }
            System.out.println();
//            
//            // Getting the dictionasry tags
//            POSTaggerFactory ptf = model.getFactory();
//            TagDictionary tagDictionary = ptf.getTagDictionary();
//            String dictionaryTags[] = tagDictionary.getTags("the");
//            System.out.println(dictionaryTags.length);
//            for(String word : dictionaryTags) {
//                 System.out.println(word);
//            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
 
Example 9
Source File: Chapter5.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 4 votes vote down vote up
private static void usingOpenNLPChunker() {
        try (
                InputStream posModelStream = new FileInputStream(
                        getModelDir() + "\\en-pos-maxent.bin");
                InputStream chunkerStream = new FileInputStream(
                        getModelDir() + "\\en-chunker.bin");) {
                    POSModel model = new POSModel(posModelStream);
                    POSTaggerME tagger = new POSTaggerME(model);
                    
                    // Used to create sample data for trainer
//                    for (String sentence : sentences) {
//                        String sen[] = tokenizeSentence(sentence);
//                        String tags[] = tagger.tag(sen);
//                        for (int i = 0; i < tags.length; i++) {
////                    for (String token : sentence) {
//                            System.out.print(sen[i] + "/" + tags[i] + " ");
//                        }
//                        System.out.println();
//                    }
//                    System.out.println();

                    String tags[] = tagger.tag(sentence);
                    for (int i = 0; i < tags.length; i++) {
//                    for (String token : sentence) {
                        System.out.print(sentence[i] + "/" + tags[i] + " ");
                    }
                    System.out.println();

                    // chunker
                    System.out.println("------------Chunker -----------");
                    ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
                    ChunkerME chunkerME = new ChunkerME(chunkerModel);
                    String result[] = chunkerME.chunk(sentence, tags);

                    for (int i = 0; i < result.length; i++) {
                        System.out.println("[" + sentence[i] + "] " + result[i]);
                    }

                    System.out.println("------------Chunker Spans -----------");
                    Span[] spans = chunkerME.chunkAsSpans(sentence, tags);
                    for (Span span : spans) {
                        System.out.print("Type: " + span.getType() + " - " + " Begin: "
                                + span.getStart() + " End:" + span.getEnd()
                                + " Length: " + span.length() + "  [");
                        for (int j = span.getStart(); j < span.getEnd(); j++) {
                            System.out.print(sentence[j] + " ");
                        }
                        System.out.println("]");
                    }
                } catch (IOException ex) {
                    ex.printStackTrace();
                }

    }
 
Example 10
Source File: OpenNlpPosRecommender.java    From inception with Apache License 2.0 4 votes vote down vote up
@Override
public EvaluationResult evaluate(List<CAS> aCasses, DataSplitter aDataSplitter)
    throws RecommendationException
{        
    List<POSSample> data = extractPosSamples(aCasses);
    List<POSSample> trainingSet = new ArrayList<>();
    List<POSSample> testSet = new ArrayList<>();

    for (POSSample posSample : data) {
        switch (aDataSplitter.getTargetSet(posSample)) {
        case TRAIN:
            trainingSet.add(posSample);
            break;
        case TEST:
            testSet.add(posSample);
            break;
        default:
            // Do nothing
            break;
        }
    }

    int testSetSize = testSet.size();
    int trainingSetSize = trainingSet.size();
    double overallTrainingSize = data.size() - testSetSize;
    double trainRatio = (overallTrainingSize > 0) ? trainingSetSize / overallTrainingSize : 0.0;
    
    if (trainingSetSize < 2 || testSetSize < 2) {
        String info = String.format(
                "Not enough evaluation data: training set [%s] items, test set [%s] of total [%s]",
                trainingSetSize, testSetSize, data.size());
        LOG.info(info);

        EvaluationResult result = new EvaluationResult(trainingSetSize,
                testSetSize, trainRatio);
        result.setEvaluationSkipped(true);
        result.setErrorMsg(info);
        return result;
    }

    LOG.info("Training on [{}] items, predicting on [{}] of total [{}]", trainingSet.size(),
        testSet.size(), data.size());

    // Train model
    POSModel model = train(trainingSet, traits.getParameters());
    if (model == null) {
        throw new RecommendationException("Model is null, cannot evaluate!");
    }

    POSTaggerME tagger = new POSTaggerME(model);

    // Evaluate
    List<LabelPair> labelPairs = new ArrayList<>();
    for (POSSample sample : testSet) {
        String[] predictedTags = tagger.tag(sample.getSentence());
        String[] goldTags = sample.getTags();
        for (int i = 0; i < predictedTags.length; i++) {
            labelPairs.add(new LabelPair(goldTags[i], predictedTags[i]));
        }
    }

    return labelPairs.stream().collect(EvaluationResult
            .collector(trainingSetSize, testSetSize, trainRatio, PAD));
}