opennlp.tools.parser.Parser Java Examples

The following examples show how to use opennlp.tools.parser.Parser. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AnswerTypeEventStream.java    From wiseowl with MIT License 6 votes vote down vote up
public static void main(String[] args) throws IOException {
    if (args.length == 0) {
        System.err.println("Usage: AnswerTypeEventStream eventfile");
        System.exit(1);
    }
    int ai = 0;
    String eventFile = args[ai++];
    String modelsDirProp = System.getProperty("models.dir", "book/src/main" + File.separator + "opennlp-models" +
            File.separator + "english");
    File modelsDir = new File(modelsDirProp);
    File wordnetDir = new File(System.getProperty("wordnet.dir", "book/src/main" + File.separator + "WordNet-3.0" + File.separator + "dict"));
    InputStream chunkerStream = new FileInputStream(
            new File(modelsDir, "en-chunker.bin"));
    ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
    ChunkerME chunker = new ChunkerME(chunkerModel);
    InputStream posStream = new FileInputStream(
            new File(modelsDir, "en-pos-maxent.bin"));
    POSModel posModel = new POSModel(posStream);
    POSTaggerME tagger = new POSTaggerME(posModel);
    Parser parser = new ChunkParser(chunker, tagger);
    AnswerTypeContextGenerator actg = new AnswerTypeContextGenerator(wordnetDir);
    EventStream es = new AnswerTypeEventStream(eventFile, actg, parser);
    while (es.hasNext()) {
        System.out.println(es.next().toString());
    }
}
 
Example #2
Source File: JM_Scorer.java    From uncc2014watsonsim with GNU General Public License v2.0 6 votes vote down vote up
public double scoreStructure(String ca, String q, String passage, boolean verbose) throws InvalidFormatException, IOException{
	POSTaggerME parserModel = new POSTaggerME(new POSModel(new FileInputStream(new File("en-pos-model.bin"))));
	Tokenizer tokenizer = new TokenizerME(new TokenizerModel(new FileInputStream(new File("en-token.bin"))));
	Parser parser = ParserFactory.create(new ParserModel(new FileInputStream(new File("en-parser.bin"))));
	double score = 0;
	
	Parse[] questionParse = ParserTool.parseLine(q, parser, 1);
	Parse[] passageParse = ParserTool.parseLine(q, parser, 1);
	
	if (passage.contains(ca)) {
		for (int i =0; i < questionParse.length; i++) {
			score += matchChildren(questionParse[i],passageParse[i]);
		}
	}
	
	return score;
}
 
Example #3
Source File: NERScorer.java    From uncc2014watsonsim with GNU General Public License v2.0 6 votes vote down vote up
public void parserTest1() throws IOException {
	if (!this.modelsAreInitialized) init();
	Parser parser = ParserFactory.create(
			this.parserModel,
			20, // beam size
			0.95); 
	Parse[] results = ParserTool.parseLine("Jane Austen was very modest about her own genius ."+this.q,
			parser, 1);
	Parse[] qResults = ParserTool.parseLine(this.q,parser, 1);
	Parse[] rChn = (results[0].getChildren())[0].getChildren();
	
	results[0].expandTopNode(results[0]);
	for (int i = 0; i < results.length; i++) {
		results[i].show();
	}
	for (int i = 0; i < qResults.length; i++) {
		qResults[i].show();
	}
	System.out.print("\n\n");
	for (int i = 0; i < rChn.length; i++) {
		rChn[i].show();
		System.out.print("\n");
	}
}
 
Example #4
Source File: POSStructureScorer.java    From uncc2014watsonsim with GNU General Public License v2.0 6 votes vote down vote up
public static Parse[] parsePassageText(String p) throws InvalidFormatException{
	
	//initialize 	 
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(sentenceModel);
	Parser parser = ParserFactory.create(
			parserModel,
			20, // beam size
			0.95); // advance percentage
 	 	 
	String[] sentences = sentenceDetector.sentDetect(p);
	Parse[] results = new Parse[sentences.length];
	for (int i=0;i<sentences.length;i++){
		String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);


		String sent= StringUtils.join(tks," ");
		System.out.println("Found sentence " + sent);
		Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
		results[i]=sentResults[0];
	}
	return results;
}
 
Example #5
Source File: OpenNlpTests.java    From uncc2014watsonsim with GNU General Public License v2.0 6 votes vote down vote up
public void parserTest1() throws IOException {
	if (!this.modelsAreInitialized) init();
	Parser parser = ParserFactory.create(
			this.parserModel,
			20, // beam size
			0.95); 
	Parse[] results = ParserTool.parseLine("Jane Austen was very modest about her own genius ."+this.q,
			parser, 1);
	Parse[] qResults = ParserTool.parseLine(this.q,parser, 1);
	Parse[] rChn = (results[0].getChildren())[0].getChildren();
	
	results[0].expandTopNode(results[0]);
	for (int i = 0; i < results.length; i++) {
		results[i].show();
	}
	for (int i = 0; i < qResults.length; i++) {
		qResults[i].show();
	}
	System.out.print("\n\n");
	for (int i = 0; i < rChn.length; i++) {
		rChn[i].show();
		System.out.print("\n");
	}
}
 
Example #6
Source File: OpenNlpTests.java    From uncc2014watsonsim with GNU General Public License v2.0 6 votes vote down vote up
public Parse[] parsePassageText(String p) throws InvalidFormatException{
	if (!modelsAreInitialized)init();
	//initialize 	 
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	Parser parser = ParserFactory.create(
			this.parserModel,
			20, // beam size
			0.95); // advance percentage
	//find sentences, tokenize each, parse each, return top parse for each 	 	 
	String[] sentences = sentenceDetector.sentDetect(p);
	Parse[] results = new Parse[sentences.length];
	for (int i=0;i<sentences.length;i++){
		String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
		//StringTokenizer st = new StringTokenizer(tks[i]); 
		//There are several tokenizers available. SimpleTokenizer works best

		String sent= StringUtils.join(tks," ");
		System.out.println("Found sentence " + sent);
		Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
		results[i]=sentResults[0];
	}
	return results;
}
 
Example #7
Source File: Chapter7.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 5 votes vote down vote up
private static void usingOpenNLP() {
        String fileLocation = getModelDir() + "/en-parser-chunking.bin";
        System.out.println(fileLocation);
        try (InputStream modelInputStream = new FileInputStream(fileLocation);) {
            ParserModel model = new ParserModel(modelInputStream);
            Parser parser = ParserFactory.create(model);
            String sentence = "The cow jumped over the moon";
            // Used to demonstrate difference between NER and Parser
            sentence = "He was the last person to see Fred.";

            Parse parses[] = ParserTool.parseLine(sentence, parser, 3);
            for (Parse parse : parses) {
                // First display
                parse.show();
                // Second display
//                parse.showCodeTree();
                // Third display
//                System.out.println("Children");
//                Parse children[] = parse.getChildren();
//                for (Parse parseElement : children) {
//                    System.out.println(parseElement);
//                    System.out.println(parseElement.getText());
//                    System.out.println(parseElement.getType());
//                    Parse tags[] = parseElement.getTagNodes();
//                    System.out.println("Tags");
//                    for (Parse tag : tags) {
//                        System.out.println("[" + tag + "]" + " type: " + tag.getType()
//                                + "  Probability: " + tag.getProb()
//                                + "  Label: " + tag.getLabel());
//                    }
//                }
            }
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }
 
Example #8
Source File: WiseOwlQParserPlugin.java    From wiseowl with MIT License 5 votes vote down vote up
@Override
public QParser createParser(String qStr, SolrParams localParams, SolrParams params,
                            SolrQueryRequest req) {
  answerTypeMap = new HashMap<String, String>();//<co id="qqpp.atm"/>
  answerTypeMap.put("L", "NE_LOCATION");
  answerTypeMap.put("T", "NE_TIME|NE_DATE");
  answerTypeMap.put("P", "NE_PERSON");
  answerTypeMap.put("M", "NE_MONEY");
  answerTypeMap.put("O", "NE_ORGANIZATION");
  answerTypeMap.put("L", "NE_LOCATION");
  answerTypeMap.put("C", "NE_PERCENT");
  answerTypeMap.put("F", "DESCRIPTION");
  answerTypeMap.put("X", "OTHERS");
  QParser qParser;
  if (params.getBool(OWLParams.COMPONENT_NAME, false) == true //<co id="qqpp.explainif"/>
          && qStr.equals("*:*") == false) {
    AnswerTypeClassifier atc =
            new AnswerTypeClassifier(model, probs, atcg);//<co id="qqpp.atc"/>
    Parser parser = new ChunkParser(chunker, tagger);//<co id="qqpp.parser"/>
    qParser = new WiseOwlQParser(qStr, localParams, //<co id="qqpp.construct"/>
            params, req, parser, atc, answerTypeMap);
  } else {
    //just do a regular query if OWL is turned off
    qParser = req.getCore().getQueryPlugin("edismax")
            .createParser(qStr, localParams, params, req);
  }
  return qParser;
}
 
Example #9
Source File: AnswerTypeClassifier.java    From wiseowl with MIT License 5 votes vote down vote up
/**
 * Train the answer model
 * <p>
 * Hint:
 * <pre>
 *  mvn exec:java -Dexec.mainClass=com.tamingtext.qa.AnswerTypeClassifier \
 *    -Dexec.args="dist/data/questions-train.txt en-answer.bin" \
 *    -Dmodel.dir=../../opennlp-models \
 *    -Dwordnet.dir=../../Wordnet-3.0/dict
 *  </pre>
 *
 * @param args
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
    if (args.length < 2) {
        System.err.println("Usage: AnswerTypeClassifier <trainFile> <modelFile>");
        System.exit(1);
    }

    String trainFile = args[0];
    File outFile = new File(args[1]);
    String modelsDirProp = System.getProperty("model.dir");
    File modelsDir = new File(modelsDirProp);
    String wordnetDir = System.getProperty("wordnet.dir");

    InputStream chunkerStream = new FileInputStream(
            new File(modelsDir, "en-chunker.bin"));
    ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
    ChunkerME chunker = new ChunkerME(chunkerModel);
    InputStream posStream = new FileInputStream(
            new File(modelsDir, "en-pos-maxent.bin"));
    POSModel posModel = new POSModel(posStream);
    POSTaggerME tagger = new POSTaggerME(posModel);
    Parser parser = new ChunkParser(chunker, tagger);
    AnswerTypeContextGenerator actg = new AnswerTypeContextGenerator(new File(wordnetDir));
    //<start id="atc.train"/>
    AnswerTypeEventStream es = new AnswerTypeEventStream(trainFile,
            actg, parser);
    GISModel model = GIS.trainModel(100, new TwoPassDataIndexer(es, 3));//<co id="atc.train.do"/>
    GISModelWriter writer = new SuffixSensitiveGISModelWriter(model, outFile);
    writer.persist();
    //new DoccatModel("en", model).serialize(new FileOutputStream(outFile));
/*
<calloutlist>
    <callout arearefs="atc.train.do"><para>Using the event stream, which feeds us training examples, do the actual training using OpenNLP's Maxent classifier.</para></callout>
</calloutlist>
*/
    //<end id="atc.train"/>
}
 
Example #10
Source File: WiseOwlQParser.java    From wiseowl with MIT License 5 votes vote down vote up
public WiseOwlQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req,
                       Parser parser, AnswerTypeClassifier atc,
                       Map<String,String> answerTypeMap) {
  super(qstr, localParams, params, req);
  this.parser = parser;
  this.atc = atc;
  this.atm = answerTypeMap;
}
 
Example #11
Source File: FocusNoun.java    From wiseowl with MIT License 5 votes vote down vote up
public static void main(String args[]) throws IOException
{
	String wordnetDir = System.getProperty("wordnet.dir");
	//wordnetDir="WordNet-3.0/dict/";
	String question="Who is Abraham Lincoln?";
	AnswerTypeContextGenerator atcg=new AnswerTypeContextGenerator(new File(wordnetDir));
	String q=null;
    String modelsDirProp = System.getProperty("model.dir");
   // modelsDirProp="opennlp-models/";
    File modelsDir = new File(modelsDirProp);
    InputStream chunkerStream = new FileInputStream(
        new File(modelsDir,"en-chunker.bin"));
    ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
    ChunkerME chunker = new ChunkerME(chunkerModel);
    InputStream posStream = new FileInputStream(
        new File(modelsDir,"en-pos-maxent.bin"));
    POSModel posModel = new POSModel(posStream);
    POSTaggerME tagger =  new POSTaggerME(posModel);
    Parser parser = new ChunkParser(chunker, tagger);
    
    Parse query = ParserTool.parseLine(question,parser,1)[0];
	String[] context=atcg.getContext(query);
	for(int i=0;i<context.length;i++)
	{
		if(context[i].startsWith("hw=") || context[i].startsWith("mw="))
		{
			System.out.println(context[i].substring(3));
		}
	}
}
 
Example #12
Source File: FocusNoun.java    From wiseowl with MIT License 5 votes vote down vote up
public String[] getFocusNoun(String question) throws IOException
{
	String wordnetDir = System.getProperty("wordnet.dir");
	wordnetDir="WordNet-3.0/dict/";
	AnswerTypeContextGenerator atcg=new AnswerTypeContextGenerator(new File(wordnetDir));
	String q=null;
    String modelsDirProp = System.getProperty("model.dir");
    modelsDirProp="opennlp-models/";
    File modelsDir = new File(modelsDirProp);
    InputStream chunkerStream = new FileInputStream(
        new File(modelsDir,"en-chunker.bin"));
    ChunkerModel chunkerModel = new ChunkerModel(chunkerStream);
    ChunkerME chunker = new ChunkerME(chunkerModel);
    InputStream posStream = new FileInputStream(
        new File(modelsDir,"en-pos-maxent.bin"));
    POSModel posModel = new POSModel(posStream);
    POSTaggerME tagger =  new POSTaggerME(posModel);
    Parser parser = new ChunkParser(chunker, tagger);
    
    Parse query = ParserTool.parseLine(question,parser,1)[0];
	String[] context=atcg.getContext(query);
	String[] focus=new String[2];
	int p=0;
	for(int i=0;i<context.length;i++)
	{
		if(context[i].startsWith("hw=") || context[i].startsWith("mw="))
		{
			//System.out.println(context[i].substring(3));
			focus[p++]=context[i].substring(3);
		}
	}
	return focus;
}
 
Example #13
Source File: NERScorer.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
public Parse[] parsePassageText(String p) throws InvalidFormatException{
	if (!modelsAreInitialized)init();
	//initialize 	 
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	NameFinderME nameFinder = new NameFinderME(this.nerModel);
	Parser parser = ParserFactory.create(
			this.parserModel,
			20, // beam size
			0.95); // advance percentage
	//find sentences, tokenize each, parse each, return top parse for each 	 	 
	String[] sentences = sentenceDetector.sentDetect(p);
	Parse[] results = new Parse[sentences.length];
	for (int i=0;i<sentences.length;i++){
		//String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
		
		//StringTokenizer st = new StringTokenizer(tks[i]); 
		//There are several tokenizers available. SimpleTokenizer works best
		Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
		for (int si = 0; si < sentences.length; si++) {
	        Span[] tokenSpans = tokenizer.tokenizePos(sentences[si]);
	        String[] tokens = Span.spansToStrings(tokenSpans, sentences[si]);
	        Span[] names = nameFinder.find(tokens);
	        for (int ni = 0; ni < names.length; ni++) {
	            Span startSpan = tokenSpans[names[ni].getStart()];
	            int nameStart = startSpan.getStart();
	            Span endSpan = tokenSpans[names[ni].getEnd() - 1];
	            int nameEnd = endSpan.getEnd();
	            String name = sentences[si].substring(nameStart, nameEnd);
	            System.out.println(name);
	        }
	    }
		String sent= StringUtils.join(tokenizer," ");
		System.out.println("Found sentence " + sent);
		Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
		results[i]=sentResults[0];
	}
	return results;
}
 
Example #14
Source File: AnswerTypeEventStream.java    From wiseowl with MIT License 4 votes vote down vote up
public AnswerTypeEventStream(String fileName, AnswerTypeContextGenerator atcg, Parser parser) throws IOException {
    this(fileName, null, atcg, parser);
}
 
Example #15
Source File: KensNLPScorer.java    From uncc2014watsonsim with GNU General Public License v2.0 4 votes vote down vote up
@Override
public double scorePassage(Phrase q, Answer a, Passage p) {
	
	int countOfQuestionNPsInPassage = 0;
	try {
		//prep NLP tools
		if (!this.modelsAreInitialized) init();
		Parser parser = ParserFactory.create(this.parserModel, 20, 0.95);

		//create question parse
		Parse[] questionParse = ParserTool.parseLine(q.text, parser, 1);

		//create passage parses (one for each sentence)
		String[] passageSentences = this.DivideIntoSentences(p);
		Parse[] passageParses = new Parse[passageSentences.length];
		Parse[] tempParse;
		for (int i=0; i < passageSentences.length; i++) {
			tempParse = ParserTool.parseLine(passageSentences[i], parser, 1);
			passageParses[i] = tempParse[0];
		}
		
		//retrieve NPs from the question parse
		navigateTree(questionParse, 0, questionNPs);

		//retrieve NPs from the passage parse
		for (int i=0; i < passageParses.length; i++) {
			navigateTree(passageParses, i, passageNPs);				
		}
		
		//count the number of question NPs that are in the passage NP set (A)
		for (String qNP: questionNPs) {
			for (String pNP: passageNPs) {
				//System.out.println("comparing " + qNP + " with " + pNP);
				if (qNP.equals(pNP)) {
					//System.out.println("match found");
					countOfQuestionNPsInPassage++;
				}
			}
		}
		//System.out.println(countOfQuestionNPsInPassage);
		
		//count the number of all NPs that are in the passage NP set (B)
		//passageNPs.size();
		
	} catch (InvalidFormatException e) {
		e.printStackTrace();
	}

	//calculate A/B and return as the score
	//System.out.print("******** score:  " + (double)countOfQuestionNPsInPassage/passageNPs.size() + "  *******");
	//System.out.println(" count:  " + passageNPs.size() + "  *******");
	if (passageNPs.size() == 0)
		return 0;
	else
		return (double)countOfQuestionNPsInPassage/passageNPs.size();
}