opennlp.tools.util.InvalidFormatException Java Examples

The following examples show how to use opennlp.tools.util.InvalidFormatException. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: LemmatizerFactory.java    From ixa-pipe-pos with Apache License 2.0 6 votes vote down vote up
public static LemmatizerFactory create(String subclassName)
    throws InvalidFormatException {
  if (subclassName == null) {
    // will create the default factory
    return new LemmatizerFactory();
  }
  try {
    LemmatizerFactory theFactory = ExtensionLoader.instantiateExtension(
        LemmatizerFactory.class, subclassName);
    return theFactory;
  } catch (Exception e) {
    String msg = "Could not instantiate the " + subclassName
        + ". The initialization throw an exception.";
    System.err.println(msg);
    e.printStackTrace();
    throw new InvalidFormatException(msg, e);
  }
}
 
Example #2
Source File: OpenNlpTests.java    From uncc2014watsonsim with GNU General Public License v2.0 6 votes vote down vote up
public Parse[] parsePassageText(String p) throws InvalidFormatException{
	if (!modelsAreInitialized)init();
	//initialize 	 
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	Parser parser = ParserFactory.create(
			this.parserModel,
			20, // beam size
			0.95); // advance percentage
	//find sentences, tokenize each, parse each, return top parse for each 	 	 
	String[] sentences = sentenceDetector.sentDetect(p);
	Parse[] results = new Parse[sentences.length];
	for (int i=0;i<sentences.length;i++){
		String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
		//StringTokenizer st = new StringTokenizer(tks[i]); 
		//There are several tokenizers available. SimpleTokenizer works best

		String sent= StringUtils.join(tks," ");
		System.out.println("Found sentence " + sent);
		Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
		results[i]=sentResults[0];
	}
	return results;
}
 
Example #3
Source File: JM_Scorer.java    From uncc2014watsonsim with GNU General Public License v2.0 6 votes vote down vote up
public double scoreStructure(String ca, String q, String passage, boolean verbose) throws InvalidFormatException, IOException{
	POSTaggerME parserModel = new POSTaggerME(new POSModel(new FileInputStream(new File("en-pos-model.bin"))));
	Tokenizer tokenizer = new TokenizerME(new TokenizerModel(new FileInputStream(new File("en-token.bin"))));
	Parser parser = ParserFactory.create(new ParserModel(new FileInputStream(new File("en-parser.bin"))));
	double score = 0;
	
	Parse[] questionParse = ParserTool.parseLine(q, parser, 1);
	Parse[] passageParse = ParserTool.parseLine(q, parser, 1);
	
	if (passage.contains(ca)) {
		for (int i =0; i < questionParse.length; i++) {
			score += matchChildren(questionParse[i],passageParse[i]);
		}
	}
	
	return score;
}
 
Example #4
Source File: POSStructureScorer.java    From uncc2014watsonsim with GNU General Public License v2.0 6 votes vote down vote up
public static Parse[] parsePassageText(String p) throws InvalidFormatException{
	
	//initialize 	 
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(sentenceModel);
	Parser parser = ParserFactory.create(
			parserModel,
			20, // beam size
			0.95); // advance percentage
 	 	 
	String[] sentences = sentenceDetector.sentDetect(p);
	Parse[] results = new Parse[sentences.length];
	for (int i=0;i<sentences.length;i++){
		String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);


		String sent= StringUtils.join(tks," ");
		System.out.println("Found sentence " + sent);
		Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
		results[i]=sentResults[0];
	}
	return results;
}
 
Example #5
Source File: LemmatizerModel.java    From ixa-pipe-pos with Apache License 2.0 5 votes vote down vote up
@Override
protected void validateArtifactMap() throws InvalidFormatException {
  super.validateArtifactMap();

  if (!(artifactMap.get(LEMMATIZER_MODEL_ENTRY_NAME) instanceof AbstractModel)) {
    throw new InvalidFormatException("Lemmatizer model is incomplete!");
  }
}
 
Example #6
Source File: OpenNlpTests.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
public double scoreStructureNorm(String ca, String q, String passage, boolean verbose) throws InvalidFormatException{
	double score1=0, score2=0;
	//OnlpParserTest pt= new OnlpParserTest();
	Parse[] caParse = this.parsePassageText(ca);
	Parse[] qParse = this.parsePassageText(q);
	Parse[] pasParse = this.parsePassageText(passage);
	Parse[] caParseCh = getAllChildren(caParse);
	Parse[] qParseCh = getAllChildren(qParse);
	Parse[] pasParseCh = getAllChildren(pasParse);
	score1=compareParseChunks(qParseCh, pasParseCh,verbose);
	score2=compareParseChunks(caParseCh, pasParseCh,verbose);
	return score1*score2/passage.length();
}
 
Example #7
Source File: OpenNlpTests.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
public double scoreStructure(String ca, String q, String passage, boolean verbose) throws InvalidFormatException{
	double score1=0, score2=0;
	Parse[] caParse = this.parsePassageText(ca);
	Parse[] qParse = this.parsePassageText(q);
	Parse[] pasParse = this.parsePassageText(passage);
	Parse[] caParseCh = getAllChildren(caParse);
	Parse[] qParseCh = getAllChildren(qParse);
	Parse[] pasParseCh = getAllChildren(pasParse);
	score1=compareParseChunks(qParseCh, pasParseCh,verbose);
	score2=compareParseChunks(caParseCh, pasParseCh,verbose);
	return score1*score2;
}
 
Example #8
Source File: OpenNlpTests.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
public void testSentDetector(String testSents) throws InvalidFormatException{
	init();
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	String[] sentences = sentenceDetector.sentDetect(testSents);
	for (int i=0;i<sentences.length; i++)
		System.err.println("sent: "+sentences[i]);
}
 
Example #9
Source File: NERScorer.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
public double scoreStructureNorm(String ca, String q, String passage, boolean verbose) throws InvalidFormatException{
	double score1=0, score2=0;
	//OnlpParserTest pt= new OnlpParserTest();
	Parse[] caParse = this.parsePassageText(ca);
	Parse[] qParse = this.parsePassageText(q);
	Parse[] pasParse = this.parsePassageText(passage);
	Parse[] caParseCh = getAllChildren(caParse);
	Parse[] qParseCh = getAllChildren(qParse);
	Parse[] pasParseCh = getAllChildren(pasParse);
	score1=compareParseChunks(qParseCh, pasParseCh,verbose);
	score2=compareParseChunks(caParseCh, pasParseCh,verbose);
	return score1*score2/passage.length();
}
 
Example #10
Source File: NERScorer.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
public double scoreStructure(String ca, String q, String passage, boolean verbose) throws InvalidFormatException{
	double score1=0, score2=0;
	Parse[] caParse = this.parsePassageText(ca);
	Parse[] qParse = this.parsePassageText(q);
	Parse[] pasParse = this.parsePassageText(passage);
	Parse[] caParseCh = getAllChildren(caParse);
	Parse[] qParseCh = getAllChildren(qParse);
	Parse[] pasParseCh = getAllChildren(pasParse);
	score1=compareParseChunks(qParseCh, pasParseCh,verbose);
	score2=compareParseChunks(caParseCh, pasParseCh,verbose);
	return score1*score2;
}
 
Example #11
Source File: NERScorer.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
public Parse[] parsePassageText(String p) throws InvalidFormatException{
	if (!modelsAreInitialized)init();
	//initialize 	 
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	NameFinderME nameFinder = new NameFinderME(this.nerModel);
	Parser parser = ParserFactory.create(
			this.parserModel,
			20, // beam size
			0.95); // advance percentage
	//find sentences, tokenize each, parse each, return top parse for each 	 	 
	String[] sentences = sentenceDetector.sentDetect(p);
	Parse[] results = new Parse[sentences.length];
	for (int i=0;i<sentences.length;i++){
		//String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
		
		//StringTokenizer st = new StringTokenizer(tks[i]); 
		//There are several tokenizers available. SimpleTokenizer works best
		Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
		for (int si = 0; si < sentences.length; si++) {
	        Span[] tokenSpans = tokenizer.tokenizePos(sentences[si]);
	        String[] tokens = Span.spansToStrings(tokenSpans, sentences[si]);
	        Span[] names = nameFinder.find(tokens);
	        for (int ni = 0; ni < names.length; ni++) {
	            Span startSpan = tokenSpans[names[ni].getStart()];
	            int nameStart = startSpan.getStart();
	            Span endSpan = tokenSpans[names[ni].getEnd() - 1];
	            int nameEnd = endSpan.getEnd();
	            String name = sentences[si].substring(nameStart, nameEnd);
	            System.out.println(name);
	        }
	    }
		String sent= StringUtils.join(tokenizer," ");
		System.out.println("Found sentence " + sent);
		Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
		results[i]=sentResults[0];
	}
	return results;
}
 
Example #12
Source File: NERScorer.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
public void testSentDetector(String testSents) throws InvalidFormatException{
	init();
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	String[] sentences = sentenceDetector.sentDetect(testSents);
	for (int i=0;i<sentences.length; i++)
		System.err.println("sent: "+sentences[i]);
}
 
Example #13
Source File: PassageScorerOpenNLPAda.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
public double scoreStructureNorm(String ca, String q, String passage, boolean verbose) throws InvalidFormatException{
	double score1=0, score2=0;
	//OnlpParserTest pt= new OnlpParserTest();
	Parse[] caParse = t.parsePassageText(ca);
	Parse[] qParse = t.parsePassageText(q);
	Parse[] pasParse = t.parsePassageText(passage);
	Parse[] caParseCh = t.getAllChildren(caParse);
	Parse[] qParseCh = t.getAllChildren(qParse);
	Parse[] pasParseCh = t.getAllChildren(pasParse);
	score1=this.compareParseType(qParseCh, pasParseCh,verbose);
	score2=this.compareParseType(caParseCh, pasParseCh,verbose);
	return score1*score2/passage.length();
}
 
Example #14
Source File: PassageScorerOpenNLPAda.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
public double scoreStructure(String ca, String q, String passage, boolean verbose) throws InvalidFormatException{
	double score1=0, score2=0;
	Parse[] caParse = t.parsePassageText(ca);
	Parse[] qParse = t.parsePassageText(q);
	Parse[] pasParse = t.parsePassageText(passage);
	Parse[] caParseCh = t.getAllChildren(caParse);
	Parse[] qParseCh = t.getAllChildren(qParse);
	Parse[] pasParseCh = t.getAllChildren(pasParse);
	score1=this.compareParseType(qParseCh, pasParseCh,verbose);
	score2=this.compareParseType(caParseCh, pasParseCh,verbose);
	return score1*score2;
}
 
Example #15
Source File: StephensonOpenNLPScorer.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
public void testSentDetector(String testSents) throws InvalidFormatException{
	init();
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	String[] sentences = sentenceDetector.sentDetect(testSents);
	for (int i=0;i<sentences.length; i++)
		System.err.println("sent: "+sentences[i]);
}
 
Example #16
Source File: LemmatizerModel.java    From ixa-pipe-pos with Apache License 2.0 4 votes vote down vote up
public LemmatizerModel(URL modelURL) throws IOException, InvalidFormatException {
  super(COMPONENT_NAME, modelURL);
}
 
Example #17
Source File: LemmatizerModel.java    From ixa-pipe-pos with Apache License 2.0 4 votes vote down vote up
public LemmatizerModel(File modelFile) throws IOException, InvalidFormatException {
  super(COMPONENT_NAME, modelFile);
}
 
Example #18
Source File: POSStructureScorer.java    From uncc2014watsonsim with GNU General Public License v2.0 4 votes vote down vote up
public static void main(String[] args) throws InvalidFormatException {


		
		init();
		
		String sampleQuestion = "Jane Austen";
		String sampleAnswer = "Jane Austen wrote Emma";
		String samplePassage = "Jane Austen was very modest about her own genius.[7] She once famously described her work as "+
				"the little bit (two Inches wide) of Ivory, on which I work with so fine a brush, " +
				"as produces little effect after much labor [7]. " +
				"Jane Austen wrote Emma."+
				"When she was a girl she wrote stories. Her works were printed only after much revision. " +
				"Only four of her novels were printed while she was alive. They were Sense and Sensibility (1811), " +
				"Pride and Prejudice (1813), Mansfield Park (1814) and Emma (1816). " +
				"Two other novels, Northanger Abbey and Persuasion, were printed in 1817 with " +
				"a biographical notice by her brother, Henry Austen. Persuasion was written shortly before her death. " +
				"She also wrote two earlier works, Lady Susan, and an unfinished novel, The Watsons. " +
				"She had been working on a new novel, Sanditon, but she died before she could finish it.";
		
		
		String sampleQACombined = sampleAnswer + sampleQuestion;
		Parse[] sentences = parsePassageText(samplePassage);
		
		int[] scorerModelQA = POSScoreSentece(sampleQACombined);
		int[] scorerModelEachSentenceInPassage;
		double tempScore = 0;
		double finalScore = 0;
		for (int i = 0; i < sentences.length; i++) {
			scorerModelEachSentenceInPassage = POSScoreSentece(sentences[i].toString());
			tempScore = AbsoluteScorerModelSubtractor(scorerModelQA,scorerModelEachSentenceInPassage);
			System.out.println("tempScore = "+tempScore);
			if(tempScore<= 0.1*sentences[i].toString().length())
			{
				finalScore = finalScore + tempScore;
			}
		}
		
		System.out.println("Final Score is : " + finalScore);

	}
 
Example #19
Source File: LemmatizerModel.java    From ixa-pipe-pos with Apache License 2.0 4 votes vote down vote up
public LemmatizerModel(InputStream in) throws IOException, InvalidFormatException {
  super(COMPONENT_NAME, in);
}
 
Example #20
Source File: LemmatizerFactory.java    From ixa-pipe-pos with Apache License 2.0 4 votes vote down vote up
@Override
public void validateArtifactMap() throws InvalidFormatException {
  // no additional artifacts
}
 
Example #21
Source File: LexicalLibOpenNlpImplTest.java    From SciGraph with Apache License 2.0 4 votes vote down vote up
@BeforeClass
public static void setup() throws InvalidFormatException, IOException {
  Injector i = Guice.createInjector(new LexicalLibModule(), new OpenNlpModule());
  lexLib = i.getInstance(LexicalLibOpenNlpImpl.class);
}
 
Example #22
Source File: KensNLPScorer.java    From uncc2014watsonsim with GNU General Public License v2.0 4 votes vote down vote up
@Override
public double scorePassage(Phrase q, Answer a, Passage p) {
	
	int countOfQuestionNPsInPassage = 0;
	try {
		//prep NLP tools
		if (!this.modelsAreInitialized) init();
		Parser parser = ParserFactory.create(this.parserModel, 20, 0.95);

		//create question parse
		Parse[] questionParse = ParserTool.parseLine(q.text, parser, 1);

		//create passage parses (one for each sentence)
		String[] passageSentences = this.DivideIntoSentences(p);
		Parse[] passageParses = new Parse[passageSentences.length];
		Parse[] tempParse;
		for (int i=0; i < passageSentences.length; i++) {
			tempParse = ParserTool.parseLine(passageSentences[i], parser, 1);
			passageParses[i] = tempParse[0];
		}
		
		//retrieve NPs from the question parse
		navigateTree(questionParse, 0, questionNPs);

		//retrieve NPs from the passage parse
		for (int i=0; i < passageParses.length; i++) {
			navigateTree(passageParses, i, passageNPs);				
		}
		
		//count the number of question NPs that are in the passage NP set (A)
		for (String qNP: questionNPs) {
			for (String pNP: passageNPs) {
				//System.out.println("comparing " + qNP + " with " + pNP);
				if (qNP.equals(pNP)) {
					//System.out.println("match found");
					countOfQuestionNPsInPassage++;
				}
			}
		}
		//System.out.println(countOfQuestionNPsInPassage);
		
		//count the number of all NPs that are in the passage NP set (B)
		//passageNPs.size();
		
	} catch (InvalidFormatException e) {
		e.printStackTrace();
	}

	//calculate A/B and return as the score
	//System.out.print("******** score:  " + (double)countOfQuestionNPsInPassage/passageNPs.size() + "  *******");
	//System.out.println(" count:  " + passageNPs.size() + "  *******");
	if (passageNPs.size() == 0)
		return 0;
	else
		return (double)countOfQuestionNPsInPassage/passageNPs.size();
}
 
Example #23
Source File: KensNLPScorer.java    From uncc2014watsonsim with GNU General Public License v2.0 4 votes vote down vote up
public String[] DivideIntoSentences(Passage p) throws InvalidFormatException{
	init();
	SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
	return sentenceDetector.sentDetect(p.text);
}