opennlp.tools.postag.POSTaggerME Java Examples

The following examples show how to use opennlp.tools.postag.POSTaggerME. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OpenNLP.java    From baleen with Apache License 2.0 6 votes vote down vote up
@Override
public void doInitialize(UimaContext aContext) throws ResourceInitializationException {
  try {
    tokensModel.loadModel(TokenizerModel.class, getClass().getResourceAsStream("en_token.bin"));
    sentencesModel.loadModel(SentenceModel.class, getClass().getResourceAsStream("en_sent.bin"));
    posModel.loadModel(POSModel.class, getClass().getResourceAsStream("en_pos_maxent.bin"));
    chunkModel.loadModel(ChunkerModel.class, getClass().getResourceAsStream("en_chunker.bin"));
  } catch (BaleenException be) {
    getMonitor().error("Unable to load OpenNLP Language Models", be);
    throw new ResourceInitializationException(be);
  }

  try {
    sentenceDetector = new SentenceDetectorME((SentenceModel) sentencesModel.getModel());
    wordTokenizer = new TokenizerME((TokenizerModel) tokensModel.getModel());
    posTagger = new POSTaggerME((POSModel) posModel.getModel());
    phraseChunker = new ChunkerME((ChunkerModel) chunkModel.getModel());
  } catch (Exception e) {
    getMonitor().error("Unable to create OpenNLP taggers", e);
    throw new ResourceInitializationException(e);
  }
}
 
Example #2
Source File: ChunkerUnitTest.java    From tutorials with MIT License 6 votes vote down vote up
@Test
public void givenChunkerModel_whenChunk_thenChunksAreDetected() throws Exception {

    SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
    String[] tokens = tokenizer.tokenize("He reckons the current account deficit will narrow to only 8 billion.");

    InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin");
    POSModel posModel = new POSModel(inputStreamPOSTagger);
    POSTaggerME posTagger = new POSTaggerME(posModel);
    String tags[] = posTagger.tag(tokens);

    InputStream inputStreamChunker = new FileInputStream("src/main/resources/models/en-chunker.bin");
    ChunkerModel chunkerModel = new ChunkerModel(inputStreamChunker);
    ChunkerME chunker = new ChunkerME(chunkerModel);
    String[] chunks = chunker.chunk(tokens, tags);
    assertThat(chunks).contains("B-NP", "B-VP", "B-NP", "I-NP", "I-NP", "I-NP", "B-VP", "I-VP", "B-PP", "B-NP", "I-NP", "I-NP", "O");
}
 
Example #3
Source File: LemmetizerUnitTest.java    From tutorials with MIT License 6 votes vote down vote up
@Test
public void givenEnglishDictionary_whenLemmatize_thenLemmasAreDetected() throws Exception {

    SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
    String[] tokens = tokenizer.tokenize("John has a sister named Penny.");

    InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin");
    POSModel posModel = new POSModel(inputStreamPOSTagger);
    POSTaggerME posTagger = new POSTaggerME(posModel);
    String tags[] = posTagger.tag(tokens);
    InputStream dictLemmatizer = getClass().getResourceAsStream("/models/en-lemmatizer.dict");
    DictionaryLemmatizer lemmatizer = new DictionaryLemmatizer(dictLemmatizer);
    String[] lemmas = lemmatizer.lemmatize(tokens, tags);

    assertThat(lemmas).contains("O", "have", "a", "sister", "name", "O", "O");
}
 
Example #4
Source File: PosTagger.java    From dexter with Apache License 2.0 6 votes vote down vote up
private PosTagger() {

		ss = new SentenceSegmenter();
		ts = new TokenSegmenter();
		InputStream modelIn = null;
		try {
			// Loading tokenizer model
			modelIn = getClass().getResourceAsStream("/nlp/en-pos-maxent.bin");
			final POSModel posModel = new POSModel(modelIn);
			modelIn.close();

			posTagger = new POSTaggerME(posModel);

		} catch (final IOException ioe) {
			ioe.printStackTrace();
		} finally {
			if (modelIn != null) {
				try {
					modelIn.close();
				} catch (final IOException e) {
				} // oh well!
			}
		}
	}
 
Example #5
Source File: AbstractTaggerTrainer.java    From ixa-pipe-pos with Apache License 2.0 6 votes vote down vote up
/**
 * Create ngram dictionary from training data.
 * 
 * @param aDictSamples
 *          the training data
 * @param aNgramCutoff
 *          the cutoff
 * @return ngram dictionary
 */
protected final Dictionary createNgramDictionary(
    final ObjectStream<POSSample> aDictSamples, final int aNgramCutoff) {
  Dictionary ngramDict = null;
  if (aNgramCutoff != Flags.DEFAULT_DICT_CUTOFF) {
    System.err.print("Building ngram dictionary ... ");
    try {
      ngramDict = POSTaggerME
          .buildNGramDictionary(aDictSamples, aNgramCutoff);
      this.dictSamples.reset();
    } catch (final IOException e) {
      throw new TerminateToolException(-1,
          "IO error while building NGram Dictionary: " + e.getMessage(), e);
    }
    System.err.println("done");
  }
  return ngramDict;
}
 
Example #6
Source File: AbstractTaggerTrainer.java    From ixa-pipe-pos with Apache License 2.0 6 votes vote down vote up
/**
 * Automatically create a tag dictionary from training data.
 * 
 * @param aDictSamples
 *          the dictSamples created from training data
 * @param aDictCutOff
 *          the cutoff to create the dictionary
 */
protected final void createAutomaticDictionary(
    final ObjectStream<POSSample> aDictSamples, final int aDictCutOff) {
  if (aDictCutOff != Flags.DEFAULT_DICT_CUTOFF) {
    try {
      TagDictionary dict = getPosTaggerFactory().getTagDictionary();
      if (dict == null) {
        dict = getPosTaggerFactory().createEmptyTagDictionary();
        getPosTaggerFactory().setTagDictionary(dict);
      }
      if (dict instanceof MutableTagDictionary) {
        POSTaggerME.populatePOSDictionary(aDictSamples,
            (MutableTagDictionary) dict, aDictCutOff);
      } else {
        throw new IllegalArgumentException("Can't extend a POSDictionary"
            + " that does not implement MutableTagDictionary.");
      }
      this.dictSamples.reset();
    } catch (final IOException e) {
      throw new TerminateToolException(-1,
          "IO error while creating/extending POS Dictionary: "
              + e.getMessage(), e);
    }
  }
}
 
Example #7
Source File: AbstractTaggerTrainer.java    From ixa-pipe-pos with Apache License 2.0 6 votes vote down vote up
public final POSModel train(final TrainingParameters params) {
  // features
  if (getPosTaggerFactory() == null) {
    throw new IllegalStateException(
        "Classes derived from AbstractTrainer must "
            + " create a POSTaggerFactory features!");
  }
  // training model
  POSModel trainedModel = null;
  POSEvaluator posEvaluator = null;
  try {
    trainedModel = POSTaggerME.train(this.lang, this.trainSamples, params,
        getPosTaggerFactory());
    final POSTaggerME posTagger = new POSTaggerME(trainedModel);
    posEvaluator = new POSEvaluator(posTagger);
    posEvaluator.evaluate(this.testSamples);
  } catch (final IOException e) {
    System.err.println("IO error while loading training and test sets!");
    e.printStackTrace();
    System.exit(1);
  }
  System.out.println("Final result: " + posEvaluator.getWordAccuracy());
  return trainedModel;
}
 
Example #8
Source File: JM_Scorer.java    From uncc2014watsonsim with GNU General Public License v2.0 6 votes vote down vote up
public double scoreStructure(String ca, String q, String passage, boolean verbose) throws InvalidFormatException, IOException{
	POSTaggerME parserModel = new POSTaggerME(new POSModel(new FileInputStream(new File("en-pos-model.bin"))));
	Tokenizer tokenizer = new TokenizerME(new TokenizerModel(new FileInputStream(new File("en-token.bin"))));
	Parser parser = ParserFactory.create(new ParserModel(new FileInputStream(new File("en-parser.bin"))));
	double score = 0;
	
	Parse[] questionParse = ParserTool.parseLine(q, parser, 1);
	Parse[] passageParse = ParserTool.parseLine(q, parser, 1);
	
	if (passage.contains(ca)) {
		for (int i =0; i < questionParse.length; i++) {
			score += matchChildren(questionParse[i],passageParse[i]);
		}
	}
	
	return score;
}
 
Example #9
Source File: AnswerTypeEventStream.java    From wiseowl with MIT License 6 votes vote down vote up
public static void main(String[] args) throws IOException {
    if (args.length == 0) {
        System.err.println("Usage: AnswerTypeEventStream eventfile");
        System.exit(1);
    }
    int ai = 0;
    String eventFile = args[ai++];
    String modelsDirProp = System.getProperty("models.dir", "book/src/main" + File.separator + "opennlp-models" +
            File.separator + "english");
    File modelsDir = new File(modelsDirProp);
    File wordnetDir = new File(System.getProperty("wordnet.dir", "book/src/main" + File.separator + "WordNet-3.0" + File.separator + "dict"));
    InputStream chunkerStream = new FileInputStream(
            new File(modelsDir, "en-chunker.bin"));
    ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
    ChunkerME chunker = new ChunkerME(chunkerModel);
    InputStream posStream = new FileInputStream(
            new File(modelsDir, "en-pos-maxent.bin"));
    POSModel posModel = new POSModel(posStream);
    POSTaggerME tagger = new POSTaggerME(posModel);
    Parser parser = new ChunkParser(chunker, tagger);
    AnswerTypeContextGenerator actg = new AnswerTypeContextGenerator(wordnetDir);
    EventStream es = new AnswerTypeEventStream(eventFile, actg, parser);
    while (es.hasNext()) {
        System.out.println(es.next().toString());
    }
}
 
Example #10
Source File: Chapter1.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 6 votes vote down vote up
private static void detectingPartsOfSpeechExample() {
    String sentence = "POS processing is useful for enhancing the "
            + "quality of data sent to other elements of a pipeline.";

    POSModel model = new POSModelLoader()
            .load(new File("C:/Current Books/NLP and Java/Models/", "en-pos-maxent.bin"));
    POSTaggerME tagger = new POSTaggerME(model);

    String tokens[] = WhitespaceTokenizer.INSTANCE
            .tokenize(sentence);
    String[] tags = tagger.tag(tokens);

    POSSample sample = new POSSample(tokens, tags);
    String posTokens[] = sample.getSentence();
    String posTags[] = sample.getTags();
    for (int i = 0; i < posTokens.length; i++) {
        System.out.print(posTokens[i] + " - " + posTags[i]);
    }
    System.out.println();

    for (int i = 0; i < tokens.length; i++) {
        System.out.print(tokens[i] + "[" + tags[i] + "] ");
    }
}
 
Example #11
Source File: OpenNlpPosRecommender.java    From inception with Apache License 2.0 6 votes vote down vote up
@Override
public void train(RecommenderContext aContext, List<CAS> aCasses)
    throws RecommendationException
{
    List<POSSample> posSamples = extractPosSamples(aCasses);
    
    if (posSamples.size() < 2) {
        LOG.info("Not enough training data: [{}] items", posSamples.size());
        return;
    }

    // The beam size controls how many results are returned at most. But even if the user
    // requests only few results, we always use at least the default bean size recommended by
    // OpenNLP
    int beamSize = Math.max(maxRecommendations, POSTaggerME.DEFAULT_BEAM_SIZE);

    TrainingParameters params = traits.getParameters();
    params.put(BeamSearch.BEAM_SIZE_PARAMETER, Integer.toString(beamSize));
    POSModel model = train(posSamples, params);

    aContext.put(KEY_MODEL, model);
}
 
Example #12
Source File: OpenNlpPosRecommender.java    From inception with Apache License 2.0 6 votes vote down vote up
@Nullable
private POSModel train(List<POSSample> aPosSamples, TrainingParameters aParameters)
    throws RecommendationException
{
    if (aPosSamples.isEmpty()) {
        return null;
    }

    try (POSSampleStream stream = new POSSampleStream(aPosSamples)) {
        POSTaggerFactory taggerFactory = new POSTaggerFactory();
        return POSTaggerME.train("unknown", stream, aParameters, taggerFactory);
    }
    catch (IOException e) {
        throw new RecommendationException("Error training model", e);
    }
}
 
Example #13
Source File: OpenNLPAnnotator.java    From Stargraph with MIT License 6 votes vote down vote up
@Override
public List<Word> doRun(Language language, String sentence) {
    Tokenizer tokenizer = new TokenizerME(getTokenizerModel(language));
    POSTaggerME tagger = new POSTaggerME(getPOSModel(language));
    String[] tokens = tokenizer.tokenize(sentence);
    String[] tags = tagger.tag(tokens);

    PartOfSpeechSet posSet = PartOfSpeechSet.getPOSSet(language);

    List<Word> words = new ArrayList<>();
    for (int i = 0; i < tokens.length; i++) {
        words.add(new Word(posSet.valueOf(tags[i]), tokens[i]));
    }

    return words;
}
 
Example #14
Source File: AnswerTypeClassifier.java    From wiseowl with MIT License 5 votes vote down vote up
/**
 * Train the answer model
 * <p>
 * Hint:
 * <pre>
 *  mvn exec:java -Dexec.mainClass=com.tamingtext.qa.AnswerTypeClassifier \
 *    -Dexec.args="dist/data/questions-train.txt en-answer.bin" \
 *    -Dmodel.dir=../../opennlp-models \
 *    -Dwordnet.dir=../../Wordnet-3.0/dict
 *  </pre>
 *
 * @param args
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
    if (args.length < 2) {
        System.err.println("Usage: AnswerTypeClassifier <trainFile> <modelFile>");
        System.exit(1);
    }

    String trainFile = args[0];
    File outFile = new File(args[1]);
    String modelsDirProp = System.getProperty("model.dir");
    File modelsDir = new File(modelsDirProp);
    String wordnetDir = System.getProperty("wordnet.dir");

    InputStream chunkerStream = new FileInputStream(
            new File(modelsDir, "en-chunker.bin"));
    ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
    ChunkerME chunker = new ChunkerME(chunkerModel);
    InputStream posStream = new FileInputStream(
            new File(modelsDir, "en-pos-maxent.bin"));
    POSModel posModel = new POSModel(posStream);
    POSTaggerME tagger = new POSTaggerME(posModel);
    Parser parser = new ChunkParser(chunker, tagger);
    AnswerTypeContextGenerator actg = new AnswerTypeContextGenerator(new File(wordnetDir));
    //<start id="atc.train"/>
    AnswerTypeEventStream es = new AnswerTypeEventStream(trainFile,
            actg, parser);
    GISModel model = GIS.trainModel(100, new TwoPassDataIndexer(es, 3));//<co id="atc.train.do"/>
    GISModelWriter writer = new SuffixSensitiveGISModelWriter(model, outFile);
    writer.persist();
    //new DoccatModel("en", model).serialize(new FileOutputStream(outFile));
/*
<calloutlist>
    <callout arearefs="atc.train.do"><para>Using the event stream, which feeds us training examples, do the actual training using OpenNLP's Maxent classifier.</para></callout>
</calloutlist>
*/
    //<end id="atc.train"/>
}
 
Example #15
Source File: POSTaggerUnitTest.java    From tutorials with MIT License 5 votes vote down vote up
@Test
public void givenPOSModel_whenPOSTagging_thenPOSAreDetected() throws Exception {

    SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
    String[] tokens = tokenizer.tokenize("John has a sister named Penny.");

    InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin");
    POSModel posModel = new POSModel(inputStreamPOSTagger);
    POSTaggerME posTagger = new POSTaggerME(posModel);
    String tags[] = posTagger.tag(tokens);
    assertThat(tags).contains("NNP", "VBZ", "DT", "NN", "VBN", "NNP", ".");
}
 
Example #16
Source File: StatisticalTagger.java    From ixa-pipe-pos with Apache License 2.0 5 votes vote down vote up
/**
 * Construct a morphotagger with {@code MorphoFactory}.
 * 
 * @param props
 *          the properties object
 * @param aMorphoFactory
 *          the morpho factory
 */
public StatisticalTagger(final Properties props, final MorphoFactory aMorphoFactory) {
  final String lang = props.getProperty("language");
  final String model = props.getProperty("model");
  final Boolean useModelCache = Boolean.valueOf(props.getProperty("useModelCache", "true"));
  final POSModel posModel = loadModel(lang, model, useModelCache);
  this.posTagger = new POSTaggerME(posModel);
  this.morphoFactory = aMorphoFactory;
}
 
Example #17
Source File: PoStagger.java    From Canova with Apache License 2.0 5 votes vote down vote up
/**
 * Initializes the current instance with the given context.
 *
 * Note: Do all initialization in this method, do not use the constructor.
 */
@Override
public void initialize(UimaContext context)
    throws ResourceInitializationException {

  super.initialize(context);

  this.context = context;

  this.logger = context.getLogger();

  if (this.logger.isLoggable(Level.INFO)) {
    this.logger.log(Level.INFO, "Initializing the OpenNLP "
        + "Part of Speech annotator.");
  }

  POSModel model;

  try {
    POSModelResource modelResource = (POSModelResource) context
        .getResourceObject(UimaUtil.MODEL_PARAMETER);

    model = modelResource.getModel();
  } catch (ResourceAccessException e) {
    throw new ResourceInitializationException(e);
  }

  Integer beamSize = AnnotatorUtil.getOptionalIntegerParameter(context,
      UimaUtil.BEAM_SIZE_PARAMETER);

  if (beamSize == null)
    beamSize = POSTaggerME.DEFAULT_BEAM_SIZE;

  this.posTagger = new POSTaggerME(model, beamSize, 0);
}
 
Example #18
Source File: OpenNlpModule.java    From SciGraph with Apache License 2.0 5 votes vote down vote up
@CheckedProvides(PosTaggerProvider.class)
POSTaggerME getPosTagger() throws IOException {
  try (InputStream is = getClass().getResourceAsStream("/opennlp/en-pos-maxent.bin")) {
    POSModel model = new POSModel(is);
    return new POSTaggerME(model);
  }
}
 
Example #19
Source File: BasicActions.java    From knowledge-extraction with Apache License 2.0 5 votes vote down vote up
public String[] testTagger(){
	String[] tags = {};
	try (InputStream modelIn = BasicActions.class.getClassLoader().
				getResourceAsStream(Consts.EN_POS_MODEL);){
				
		POSModel posModel = new POSModel(modelIn);
		POSTaggerME tagger = new POSTaggerME(posModel);
		tags = tagger.tag(testTokenizer());
			System.out.println(Arrays.toString(tags));
	} catch (IOException e) {
		e.printStackTrace();
	}
	return tags;
}
 
Example #20
Source File: WiseOwlQParserPlugin.java    From wiseowl with MIT License 5 votes vote down vote up
@SuppressWarnings("rawtypes")
public void init(NamedList initArgs) {
	
    SolrParams params = SolrParams.toSolrParams(initArgs);
    String modelDirectory = params.get("modelDirectory",
            System.getProperty("model.dir"));//<co id="qqpp.model"/>
    String wordnetDirectory = params.get("wordnetDirectory",
            System.getProperty("wordnet.dir"));//<co id="qqpp.wordnet"/>
    if (modelDirectory != null) {
      File modelsDir = new File(modelDirectory);
      try {
        InputStream chunkerStream = new FileInputStream(
            new File(modelsDir,"en-chunker.bin"));
        ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
        chunker = new ChunkerME(chunkerModel); //<co id="qqpp.chunker"/>
        InputStream posStream = new FileInputStream(
            new File(modelsDir,"en-pos-maxent.bin"));
        POSModel posModel = new POSModel(posStream);
        tagger =  new POSTaggerME(posModel); //<co id="qqpp.tagger"/>
       // model = new DoccatModel(new FileInputStream( //<co id="qqpp.theModel"/>
     //       new File(modelDirectory,"en-answer.bin"))).getMaxentModel();
        model = new SuffixSensitiveGISModelReader(new File(modelDirectory+"/qa/ans.bin")).getModel();
        //GISModel m = new SuffixSensitiveGISModelReader(new File(modelFileName)).getModel(); 
        probs = new double[model.getNumOutcomes()];
        atcg = new AnswerTypeContextGenerator(
                new File(wordnetDirectory, "dict"));//<co id="qqpp.context"/>
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
  }
 
Example #21
Source File: FocusNoun.java    From wiseowl with MIT License 5 votes vote down vote up
public static void main(String args[]) throws IOException
{
	String wordnetDir = System.getProperty("wordnet.dir");
	//wordnetDir="WordNet-3.0/dict/";
	String question="Who is Abraham Lincoln?";
	AnswerTypeContextGenerator atcg=new AnswerTypeContextGenerator(new File(wordnetDir));
	String q=null;
    String modelsDirProp = System.getProperty("model.dir");
   // modelsDirProp="opennlp-models/";
    File modelsDir = new File(modelsDirProp);
    InputStream chunkerStream = new FileInputStream(
        new File(modelsDir,"en-chunker.bin"));
    ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
    ChunkerME chunker = new ChunkerME(chunkerModel);
    InputStream posStream = new FileInputStream(
        new File(modelsDir,"en-pos-maxent.bin"));
    POSModel posModel = new POSModel(posStream);
    POSTaggerME tagger =  new POSTaggerME(posModel);
    Parser parser = new ChunkParser(chunker, tagger);
    
    Parse query = ParserTool.parseLine(question,parser,1)[0];
	String[] context=atcg.getContext(query);
	for(int i=0;i<context.length;i++)
	{
		if(context[i].startsWith("hw=") || context[i].startsWith("mw="))
		{
			System.out.println(context[i].substring(3));
		}
	}
}
 
Example #22
Source File: FocusNoun.java    From wiseowl with MIT License 5 votes vote down vote up
public String[] getFocusNoun(String question) throws IOException
{
	String wordnetDir = System.getProperty("wordnet.dir");
	wordnetDir="WordNet-3.0/dict/";
	AnswerTypeContextGenerator atcg=new AnswerTypeContextGenerator(new File(wordnetDir));
	String q=null;
    String modelsDirProp = System.getProperty("model.dir");
    modelsDirProp="opennlp-models/";
    File modelsDir = new File(modelsDirProp);
    InputStream chunkerStream = new FileInputStream(
        new File(modelsDir,"en-chunker.bin"));
    ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
    ChunkerME chunker = new ChunkerME(chunkerModel);
    InputStream posStream = new FileInputStream(
        new File(modelsDir,"en-pos-maxent.bin"));
    POSModel posModel = new POSModel(posStream);
    POSTaggerME tagger =  new POSTaggerME(posModel);
    Parser parser = new ChunkParser(chunker, tagger);
    
    Parse query = ParserTool.parseLine(question,parser,1)[0];
	String[] context=atcg.getContext(query);
	String[] focus=new String[2];
	int p=0;
	for(int i=0;i<context.length;i++)
	{
		if(context[i].startsWith("hw=") || context[i].startsWith("mw="))
		{
			//System.out.println(context[i].substring(3));
			focus[p++]=context[i].substring(3);
		}
	}
	return focus;
}
 
Example #23
Source File: OpenNlpTartarus.java    From scava with Eclipse Public License 2.0 5 votes vote down vote up
private POSTaggerME loadPoSME(ClassLoader cl, String filename) throws IOException {
	InputStream resource = loadModelInput(cl, filename);
	POSModelLoader posModelLoader = new POSModelLoader();
	POSModel model = posModelLoader.load(createTempFile(resource, "bin"));
	POSTaggerME posTagger = new POSTaggerME(model);
	if(resource!=null)
		resource.close();
	return posTagger;
}
 
Example #24
Source File: NLPExamples.java    From Java-for-Data-Science with MIT License 5 votes vote down vote up
public void POSExample() {
    try (InputStream input = new FileInputStream(
            new File("en-pos-maxent.bin"));) {

        // To lower case example
        String lowerCaseVersion = sentence.toLowerCase();
        out.println(lowerCaseVersion);

        // Pull out tokens
        List<String> list = new ArrayList<>();
        Scanner scanner = new Scanner(sentence);
        while (scanner.hasNext()) {
            list.add(scanner.next());
        }
        // Convert list to an array
        String[] words = new String[1];
        words = list.toArray(words);

        // Build model
        POSModel posModel = new POSModel(input);
        POSTaggerME posTagger = new POSTaggerME(posModel);

        // Tag words
        String[] posTags = posTagger.tag(words);
        for (int i = 0; i < posTags.length; i++) {
            out.println(words[i] + " - " + posTags[i]);
        }

        // Find top sequences
        Sequence sequences[] = posTagger.topKSequences(words);
        for (Sequence sequence : sequences) {
            out.println(sequence);
        }
    } catch (IOException ex) {
        ex.printStackTrace();
    }
}
 
Example #25
Source File: NLPPOSTaggerOp.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public NLPPOSTaggerOp(POSModel model) throws IOException {
  tagger = new POSTaggerME(model);
}
 
Example #26
Source File: Chapter5.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 4 votes vote down vote up
private static void usingOpenNLPChunker() {
        try (
                InputStream posModelStream = new FileInputStream(
                        getModelDir() + "\\en-pos-maxent.bin");
                InputStream chunkerStream = new FileInputStream(
                        getModelDir() + "\\en-chunker.bin");) {
                    POSModel model = new POSModel(posModelStream);
                    POSTaggerME tagger = new POSTaggerME(model);
                    
                    // Used to create sample data for trainer
//                    for (String sentence : sentences) {
//                        String sen[] = tokenizeSentence(sentence);
//                        String tags[] = tagger.tag(sen);
//                        for (int i = 0; i < tags.length; i++) {
////                    for (String token : sentence) {
//                            System.out.print(sen[i] + "/" + tags[i] + " ");
//                        }
//                        System.out.println();
//                    }
//                    System.out.println();

                    String tags[] = tagger.tag(sentence);
                    for (int i = 0; i < tags.length; i++) {
//                    for (String token : sentence) {
                        System.out.print(sentence[i] + "/" + tags[i] + " ");
                    }
                    System.out.println();

                    // chunker
                    System.out.println("------------Chunker -----------");
                    ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
                    ChunkerME chunkerME = new ChunkerME(chunkerModel);
                    String result[] = chunkerME.chunk(sentence, tags);

                    for (int i = 0; i < result.length; i++) {
                        System.out.println("[" + sentence[i] + "] " + result[i]);
                    }

                    System.out.println("------------Chunker Spans -----------");
                    Span[] spans = chunkerME.chunkAsSpans(sentence, tags);
                    for (Span span : spans) {
                        System.out.print("Type: " + span.getType() + " - " + " Begin: "
                                + span.getStart() + " End:" + span.getEnd()
                                + " Length: " + span.length() + "  [");
                        for (int j = span.getStart(); j < span.getEnd(); j++) {
                            System.out.print(sentence[j] + " ");
                        }
                        System.out.println("]");
                    }
                } catch (IOException ex) {
                    ex.printStackTrace();
                }

    }
 
Example #27
Source File: ChunkParser.java    From wiseowl with MIT License 4 votes vote down vote up
public ChunkParser(ChunkerME chunker, POSTaggerME tagger) {
  this.chunker = chunker;
  this.tagger = tagger;
}
 
Example #28
Source File: OpenNlpPosRecommender.java    From inception with Apache License 2.0 4 votes vote down vote up
@Override
    public void predict(RecommenderContext aContext, CAS aCas)
        throws RecommendationException
    {
        POSModel model = aContext.get(KEY_MODEL).orElseThrow(() -> 
                new RecommendationException("Key [" + KEY_MODEL + "] not found in context"));
        
        POSTaggerME tagger = new POSTaggerME(model);

        Type sentenceType = getType(aCas, Sentence.class);
        Type predictedType = getPredictedType(aCas);
        Type tokenType = getType(aCas, Token.class);

        Feature scoreFeature = getScoreFeature(aCas);
        Feature predictedFeature = getPredictedFeature(aCas);
        Feature isPredictionFeature = getIsPredictionFeature(aCas);

        int predictionCount = 0;
        for (AnnotationFS sentence : select(aCas, sentenceType)) {
            if (predictionCount >= traits.getPredictionLimit()) {
                break;
            }
            predictionCount++;
            
            List<AnnotationFS> tokenAnnotations = selectCovered(tokenType, sentence);
            String[] tokens = tokenAnnotations.stream()
                .map(AnnotationFS::getCoveredText)
                .toArray(String[]::new);

            Sequence[] bestSequences = tagger.topKSequences(tokens);

//            LOG.debug("Total number of sequences predicted: {}", bestSequences.length);

            for (int s = 0; s < Math.min(bestSequences.length, maxRecommendations); s++) {
                Sequence sequence = bestSequences[s];
                List<String> outcomes = sequence.getOutcomes();
                double[] probabilities = sequence.getProbs();

//                LOG.debug("Sequence {} score {}", s, sequence.getScore());
//                LOG.debug("Outcomes: {}", outcomes);
//                LOG.debug("Probabilities: {}", asList(probabilities));

                for (int i = 0; i < outcomes.size(); i++) {
                    String label = outcomes.get(i);

                    // Do not return PADded tokens
                    if (PAD.equals(label)) {
                        continue;
                    }

                    AnnotationFS token = tokenAnnotations.get(i);
                    int begin = token.getBegin();
                    int end = token.getEnd();
                    double confidence = probabilities[i];

                    // Create the prediction
                    AnnotationFS annotation = aCas.createAnnotation(predictedType, begin, end);
                    annotation.setStringValue(predictedFeature, label);
                    annotation.setDoubleValue(scoreFeature, confidence);
                    annotation.setBooleanValue(isPredictionFeature, true);
                    aCas.addFsToIndexes(annotation);
                }
            }
        }
    }
 
Example #29
Source File: OpenNlpPosRecommender.java    From inception with Apache License 2.0 4 votes vote down vote up
@Override
public EvaluationResult evaluate(List<CAS> aCasses, DataSplitter aDataSplitter)
    throws RecommendationException
{        
    List<POSSample> data = extractPosSamples(aCasses);
    List<POSSample> trainingSet = new ArrayList<>();
    List<POSSample> testSet = new ArrayList<>();

    for (POSSample posSample : data) {
        switch (aDataSplitter.getTargetSet(posSample)) {
        case TRAIN:
            trainingSet.add(posSample);
            break;
        case TEST:
            testSet.add(posSample);
            break;
        default:
            // Do nothing
            break;
        }
    }

    int testSetSize = testSet.size();
    int trainingSetSize = trainingSet.size();
    double overallTrainingSize = data.size() - testSetSize;
    double trainRatio = (overallTrainingSize > 0) ? trainingSetSize / overallTrainingSize : 0.0;
    
    if (trainingSetSize < 2 || testSetSize < 2) {
        String info = String.format(
                "Not enough evaluation data: training set [%s] items, test set [%s] of total [%s]",
                trainingSetSize, testSetSize, data.size());
        LOG.info(info);

        EvaluationResult result = new EvaluationResult(trainingSetSize,
                testSetSize, trainRatio);
        result.setEvaluationSkipped(true);
        result.setErrorMsg(info);
        return result;
    }

    LOG.info("Training on [{}] items, predicting on [{}] of total [{}]", trainingSet.size(),
        testSet.size(), data.size());

    // Train model
    POSModel model = train(trainingSet, traits.getParameters());
    if (model == null) {
        throw new RecommendationException("Model is null, cannot evaluate!");
    }

    POSTaggerME tagger = new POSTaggerME(model);

    // Evaluate
    List<LabelPair> labelPairs = new ArrayList<>();
    for (POSSample sample : testSet) {
        String[] predictedTags = tagger.tag(sample.getSentence());
        String[] goldTags = sample.getTags();
        for (int i = 0; i < predictedTags.length; i++) {
            labelPairs.add(new LabelPair(goldTags[i], predictedTags[i]));
        }
    }

    return labelPairs.stream().collect(EvaluationResult
            .collector(trainingSetSize, testSetSize, trainRatio, PAD));
}
 
Example #30
Source File: POSTag.java    From datafu with Apache License 2.0 4 votes vote down vote up
public DataBag exec(Tuple input) throws IOException
{
    DataBag inputBag = null;

    if(input.size() != 1) {
        throw new IOException();
    }

    inputBag = (DataBag)input.get(0);
    DataBag outBag = bf.newDefaultBag();
    if(this.tagger == null) {
        String loadFile = CachedFile.getFileName(MODEL_FILE, this.modelPath);
        InputStream modelIn = new FileInputStream(loadFile);
        InputStream buffer = new BufferedInputStream(modelIn);
        POSModel model = new POSModel(buffer);
        this.tagger = new POSTaggerME(model);
    }

    // Form an inputString array thing for tagger to act on
    int bagLength = (int)inputBag.size();
    String[] words = new String[bagLength];

    Iterator<Tuple> itr = inputBag.iterator();
    int i = 0;
    while(itr.hasNext()) {
        words[i] = (String)itr.next().get(0);
        i++;
    }

    // Compute tags and their probabilities
    String tags[] = this.tagger.tag(words);
    double probs[] = this.tagger.probs();

    // Build output bag of 3-tuples
    for(int j = 0; j < tags.length; j++) {
        Tuple newTuple = tf.newTuple(3);
        newTuple.set(0, words[j]);
        newTuple.set(1, tags[j]);
        newTuple.set(2, probs[j]);
        outBag.add(newTuple);
    }

    return outBag;
}