org.grobid.core.GrobidModels Java Examples

The following examples show how to use org.grobid.core.GrobidModels. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: NERFrParser.java    From grobid-ner with Apache License 2.0 6 votes vote down vote up
/**
 * Extract all occurrences of named entities from a list of LayoutToken
 * coming from a document with fixed/preserved layout, e.g. PDF. 
 * The positions of the recognized entities are given with coordinates in 
 * the input document.
 */
public List<Entity> extractNE(List<LayoutToken> tokens) {
    if (tokens == null)
        return null;

    LexiconPositionsIndexes positionsIndexes = new LexiconPositionsIndexes(lexicon);
    positionsIndexes.computeIndexes(tokens);

    String res = NERParserCommon.toFeatureVectorLayout(tokens, positionsIndexes);
    String result = label(res);
    //List<Pair<String, String>> labeled = GenericTaggerUtils.getTokensAndLabels(result);

    //String text = LayoutTokensUtil.toText(tokens);
    List<Entity> entities = nerParserCommon.resultExtraction(GrobidModels.ENTITIES_NERFR, result, tokens);

    // we use now the sense tagger for the recognized named entity
    //List<Sense> senses = senseTagger.extractSenses(labeled, tokens, positionsIndexes);

    //NERParserCommon.merge(entities, senses);

    return entities;
}
 
Example #2
Source File: NEREnParser.java    From grobid-ner with Apache License 2.0 6 votes vote down vote up
/**
 * Extract all occurrences of named entities from a list of LayoutToken
 * coming from a document with fixed/preserved layout, e.g. PDF. 
 * The positions of the recognized entities are given with coordinates in 
 * the input document.
 */
public List<Entity> extractNE(List<LayoutToken> tokens) {
    if (tokens == null)
        return null;
    
    LexiconPositionsIndexes positionsIndexes = new LexiconPositionsIndexes(lexicon);
    positionsIndexes.computeIndexes(tokens);

    String res = NERParserCommon.toFeatureVectorLayout(tokens, positionsIndexes);
    String result = label(res);
    //List<Pair<String, String>> labeled = GenericTaggerUtils.getTokensAndLabels(result);

    //String text = LayoutTokensUtil.toText(tokens);
    List<Entity> entities = nerParserCommon.resultExtraction(GrobidModels.ENTITIES_NER, result, tokens);

    // we use now the sense tagger for the recognized named entity
    //List<Sense> senses = senseTagger.extractSenses(labeled, tokens, positionsIndexes);

    //NERParserCommon.merge(entities, senses);

    return entities;
}
 
Example #3
Source File: NERParserCommonTest.java    From grobid-ner with Apache License 2.0 6 votes vote down vote up
@Test
public void testresultExtraction_clusteror_simple2() throws Exception {
    final String input = "Austria Hungary fought the enemies with Germany.";
    String result = "Austria\taustria\tA\tAu\tAus\tAust\tAustr\ta\tia\tria\ttria\tstria\tINITCAP\tNODIGIT\t0\t0\t0\t0\t1\t0\t0\t1\t1\t1\t1\tXxxx\tXx\t0\tB-LOCATION\n" +
            "Hungary\thungary\tA\tAu\tAus\tAust\tAustr\ta\tia\tria\ttria\tstria\tINITCAP\tNODIGIT\t0\t0\t0\t0\t1\t0\t0\t1\t1\t1\t1\tXxxx\tXx\t0\tLOCATION\n" +
            "fought\tfought\tf\tfo\tfou\tfoug\tfough\tt\tht\tght\tught\tought\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxxx\tx\t0\tO\n" +
            "the\tthe\tt\tth\tthe\tthe\tthe\te\the\tthe\tthe\tthe\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxx\tx\t0\tO\n" +
            "enemies\tenemies\te\ten\tene\tenem\tenemi\ts\tes\ties\tmies\temies\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxxx\tx\t0\tO\n" +
            "with\twith\tw\twi\twit\twith\twith\th\tth\tith\twith\twith\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxxx\tx\t0\tO\n" +
            "Germany\tgermany\tG\tGe\tGer\tGerm\tGerma\ty\tny\tany\tmany\trmany\tINITCAP\tNODIGIT\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\tXxxx\tXx\t0\tB-LOCATION\n" +
            ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t.\t.\t0\tO";
    List<LayoutToken> tokenisation = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);


    final List<Entity> entities = target.resultExtraction(GrobidModels.ENTITIES_NER, result, tokenisation);

    assertThat(entities, hasSize(2));
    assertThat(entities.get(0).getRawName(), is("Austria Hungary"));
    assertThat(entities.get(0).getType(), is(LOCATION));
    assertThat(entities.get(0).getOffsetStart(), is(0));
    assertThat(entities.get(0).getOffsetEnd(), is(15));
    assertThat(input.substring(entities.get(0).getOffsetStart(), entities.get(0).getOffsetEnd()), is("Austria Hungary"));
}
 
Example #4
Source File: NERFrenchTrainer.java    From grobid-ner with Apache License 2.0 5 votes vote down vote up
public NERFrenchTrainer() {
    super(GrobidModels.ENTITIES_NERFR);

    // adjusting CRF training parameters for this model
    this.epsilon = 0.000001;
    this.window = 20;
    this.nbMaxIterations = 1000;

    // read additional properties for this sub-project to get the paths to the resources
    Properties prop = new Properties();
    InputStream input = null;
    try {
        input = new FileInputStream("src/main/resources/grobid-ner.properties");

        // load the properties file
        prop.load(input);

        // get the property value
        leMondeCorpusPath = prop.getProperty("grobid.ner.leMondeCorpus.path");
    } catch (IOException ex) {
        throw new GrobidResourceException(
                "An exception occured when accessing/reading the grobid-ner property file.", ex);
    } finally {
        if (input != null) {
            try {
                input.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}
 
Example #5
Source File: SenseTrainer.java    From grobid-ner with Apache License 2.0 5 votes vote down vote up
public SenseTrainer() {
    super(GrobidModels.ENTITIES_NERSense);
    descriptions = new TreeMap<String, String>();

    // we read first the module specific property file to get the paths to the resources
    Properties prop = new Properties();
    InputStream input = null;

    try {
        input = new FileInputStream("src/main/resources/grobid-ner.properties");

        // load the properties file
        prop.load(input);

        // get the property value
        reutersPath = prop.getProperty("grobid.ner.reuters.paths");
        conllPath = prop.getProperty("grobid.ner.reuters.conll_path");
        idiliaPath = prop.getProperty("grobid.ner.reuters.idilia_path");
        nerCorpusPath = prop.getProperty("grobid.ner.extra_corpus");
    } catch (IOException ex) {
        throw new GrobidResourceException(
                "An exception occured when accessing/reading the grobid-ner property file.", ex);
    } finally {
        if (input != null) {
            try {
                input.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}
 
Example #6
Source File: NERTrainer.java    From grobid-ner with Apache License 2.0 5 votes vote down vote up
public NERTrainer() {
    super(GrobidModels.ENTITIES_NER);

    // adjusting CRF training parameters for this model
    this.epsilon = 0.000001;
    this.window = 20;
    this.nbMaxIterations = 200;

    // read additional properties for this sub-project to get the paths to the resources
    Properties prop = new Properties();
    InputStream input = null;
    try {
        input = new FileInputStream("src/main/resources/grobid-ner.properties");

        // load the properties file
        prop.load(input);

        // get the property value
        reutersPath = prop.getProperty("grobid.ner.reuters.paths");
        idiliaPath = prop.getProperty("grobid.ner.reuters.idilia_path");
        nerCorpusPath = prop.getProperty("grobid.ner.extra_corpus");
    } catch (IOException ex) {
        throw new GrobidResourceException(
                "An exception occured when accessing/reading the grobid-ner property file.", ex);
    } finally {
        IOUtils.closeQuietly(input);
    }
}
 
Example #7
Source File: NERParserCommonTest.java    From grobid-ner with Apache License 2.0 5 votes vote down vote up
@Test
public void testresultExtraction_clusteror_simple() throws Exception {
    final String input = "Austria fought the enemies with Germany.";
    String result = "Austria\taustria\tA\tAu\tAus\tAust\tAustr\ta\tia\tria\ttria\tstria\tINITCAP\tNODIGIT\t0\t0\t0\t0\t1\t0\t0\t1\t1\t1\t1\tXxxx\tXx\t0\tB-UNKNOWN\n" +
            "fought\tfought\tf\tfo\tfou\tfoug\tfough\tt\tht\tght\tught\tought\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxxx\tx\t0\tO\n" +
            "the\tthe\tt\tth\tthe\tthe\tthe\te\the\tthe\tthe\tthe\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxx\tx\t0\tO\n" +
            "enemies\tenemies\te\ten\tene\tenem\tenemi\ts\tes\ties\tmies\temies\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxxx\tx\t0\tO\n" +
            "with\twith\tw\twi\twit\twith\twith\th\tth\tith\twith\twith\tNOCAPS\tNODIGIT\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\txxxx\tx\t0\tO\n" +
            "Germany\tgermany\tG\tGe\tGer\tGerm\tGerma\ty\tny\tany\tmany\trmany\tINITCAP\tNODIGIT\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\tXxxx\tXx\t0\tB-LOCATION\n" +
            ".\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tALLCAPS\tNODIGIT\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t.\t.\t0\tO";
    List<LayoutToken> tokenisation = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);


    final List<Entity> entities = target.resultExtraction(GrobidModels.ENTITIES_NER, result, tokenisation);

    assertThat(entities, hasSize(2));

    final Entity entity0 = entities.get(0);
    assertThat(entity0.getRawName(), is("Austria"));
    assertThat(entity0.getOffsetStart(), is(0));
    assertThat(entity0.getOffsetEnd(), is(7));

    final Entity entity1 = entities.get(1);
    assertThat(entity1.getRawName(), is("Germany"));
    assertThat(entity1.getOffsetStart(), is(32));
    assertThat(entity1.getOffsetEnd(), is(39));
}
 
Example #8
Source File: NLPLeaderboardFigParser.java    From science-result-extractor with Apache License 2.0 4 votes vote down vote up
NLPLeaderboardFigParser() {
    super(GrobidModels.FIGURE);
}
 
Example #9
Source File: NEREvaluation.java    From grobid-ner with Apache License 2.0 4 votes vote down vote up
public NEREvaluation() {
    GrobidProperties.getInstance();
    model = GrobidModels.ENTITIES_NER;
    loadAdditionalProperties();
}
 
Example #10
Source File: SenseTagger.java    From grobid-ner with Apache License 2.0 4 votes vote down vote up
public SenseTagger() {
    this(GrobidModels.ENTITIES_NERSense);
}
 
Example #11
Source File: NEREnParser.java    From grobid-ner with Apache License 2.0 4 votes vote down vote up
public NEREnParser() {
    this(GrobidModels.ENTITIES_NER);
}
 
Example #12
Source File: NERFrParser.java    From grobid-ner with Apache License 2.0 2 votes vote down vote up
public NERFrParser() {
    this(GrobidModels.ENTITIES_NERFR);

}