edu.stanford.nlp.ie.AbstractSequenceClassifier Java Examples

The following examples show how to use edu.stanford.nlp.ie.AbstractSequenceClassifier. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: CorenlpPipeline.java From datashare with GNU Affero General Public License v3.0

6 votes

/**
 * Named Entity Classifier (Conditional Random Fields) only
 *
 * @param input    the string to annotator
 * @param hash     the input hash code
 * @param language the input language
 */
private Annotations processNerClassifier(String input, String hash, Language language) throws InterruptedException {
    Annotations annotations = new Annotations(hash, getType(), language);

    LOGGER.info("name-finding for " + language.toString());
    // Recognize named entities from input
    final CoreNlpAnnotator<AbstractSequenceClassifier<CoreLabel>> abstractSequenceClassifierCoreNlpAnnotator;
    abstractSequenceClassifierCoreNlpAnnotator = CoreNlpNerModels.getInstance().get(language);
    List<Triple<String, Integer, Integer>> items = abstractSequenceClassifierCoreNlpAnnotator.annotator.classifyToCharacterOffsets(input);
    // For each recognized named entity
    for (Triple<String, Integer, Integer> item : items) {
        // Triple: <category, begin, end>
        NamedEntity.Category category = NamedEntity.Category.parse(item.first());
        int begin = item.second();
        int end = item.third();
        annotations.add(NER, begin, end, category);
    }

    return annotations;
}

Example #2

Source File: StanfordExtractorTest.java From CLAVIN-NERD with GNU General Public License v2.0

6 votes

/**
 * Checks conversion of Stanford NER output format into
 * {@link com.bericotech.clavin.resolver.ClavinLocationResolver}
 * input format.
 *
 * @throws IOException
 */
@Test
public void testConvertNERtoCLAVIN() throws IOException {
    InputStream mpis = this.getClass().getClassLoader().getResourceAsStream("models/english.all.3class.distsim.prop");
    Properties mp = new Properties();
    mp.load(mpis);
    AbstractSequenceClassifier<CoreMap> namedEntityRecognizer =
            CRFClassifier.getJarClassifier("/models/english.all.3class.distsim.crf.ser.gz", mp);

    String text = "I was born in Springfield and grew up in Boston.";
    List<Triple<String, Integer, Integer>> entitiesFromNER = namedEntityRecognizer.classifyToCharacterOffsets(text);

    List<LocationOccurrence> locationsForCLAVIN = convertNERtoCLAVIN(entitiesFromNER, text);
    assertEquals("wrong number of entities", 2, locationsForCLAVIN.size());
    assertEquals("wrong text for first entity", "Springfield", locationsForCLAVIN.get(0).getText());
    assertEquals("wrong position for first entity", 14, locationsForCLAVIN.get(0).getPosition());
    assertEquals("wrong text for second entity", "Boston", locationsForCLAVIN.get(1).getText());
    assertEquals("wrong position for second entity", 41, locationsForCLAVIN.get(1).getPosition());
}

Example #3

Source File: NERThreadLocalService.java From aliada-tool with GNU General Public License v3.0

5 votes

protected AbstractSequenceClassifier<CoreLabel> initialValue() {
	try {
		return CRFClassifier.getClassifier(classifierFilePath);
	} catch (final Exception exception) {
		LOGGER.error(MessageCatalog._00052_CLASSIFIER_LOAD_FAILURE, classifierFilePath);
		return NULL_OBJECT_CLASSIFIER;
	}
}

Example #4

Source File: NERSingletonService.java From aliada-tool with GNU General Public License v3.0

5 votes

@Override
AbstractSequenceClassifier<CoreLabel> classifier() {
	synchronized(this) {
		if (classifier == null) {
				try {
					classifier = CRFClassifier.getClassifier(classifierFilePath);
				} catch (final Exception exception) {
					LOGGER.error(MessageCatalog._00052_CLASSIFIER_LOAD_FAILURE, classifierFilePath);
					classifier = NULL_OBJECT_CLASSIFIER;
				}
		}
		return classifier;
	}
}

Example #5

Source File: StanfordNamedEntityExtractor.java From CLIFF with Apache License 2.0

5 votes

private AbstractSequenceClassifier<CoreMap> recognizerForFiles(String NERmodel, String NERprop) throws IOException, ClassCastException, ClassNotFoundException {
    InputStream mpis = this.getClass().getClassLoader().getResourceAsStream("models/" + NERprop);
    Properties mp = new Properties();
    mp.load(mpis);
    AbstractSequenceClassifier<CoreMap> recognizer = (AbstractSequenceClassifier<CoreMap>) CRFClassifier.getClassifier("models/" + NERmodel, mp);
    return recognizer;
}

Example #6

Source File: StanfordNamedEntityExtractor.java From CLIFF with Apache License 2.0

5 votes

public void initialize(CliffConfig config) throws ClassCastException, IOException, ClassNotFoundException{
	recognizerByLanguage = new HashMap<String, AbstractSequenceClassifier<CoreMap>>();
	recognizerByLanguage.put(GERMAN, recognizerForFiles("german.conll.germeval2014.hgc_175m_600.crf.ser.gz", "german-2018.hgc_175m_600.prop"));
	recognizerByLanguage.put(SPANISH, recognizerForFiles("spanish.ancora.distsim.s512.crf.ser.gz", "spanish.ancora.distsim.s512.prop"));
	recognizerByLanguage.put(ENGLISH, recognizerForFiles("english.all.3class.caseless.distsim.crf.ser.gz", "english.all.3class.caseless.distsim.prop"));
    demonyms = new WikipediaDemonymMap();
    customSubstitutions = new CustomSubstitutionMap(CUSTOM_SUBSTITUTION_FILE);
    locationBlacklist = new Blacklist(LOCATION_BLACKLIST_FILE);
    personToPlaceSubstitutions = new CustomSubstitutionMap(PERSON_TO_PLACE_FILE,false);
}

Example #7

Source File: NERApp.java From openccg with GNU Lesser General Public License v2.1

5 votes

public static String classifyToString(List<CoreMap> sentence, DocumentReaderAndWriter<CoreMap> readerAndWriter, AbstractSequenceClassifier classif) {
  PlainTextDocumentReaderAndWriter.OutputStyle outFormat =
    PlainTextDocumentReaderAndWriter.OutputStyle.fromShortName("inlineXML");

  DocumentReaderAndWriter<CoreMap> tmp = readerAndWriter;
  readerAndWriter = new PlainTextDocumentReaderAndWriter<CoreMap>();
  readerAndWriter.init(classif.flags);

  StringBuilder sb = new StringBuilder();
  sb.append(((PlainTextDocumentReaderAndWriter<CoreMap>) readerAndWriter).getAnswers(sentence, outFormat, true));
  return sb.toString();
}

Example #8

Source File: NERThreadLocalService.java From aliada-tool with GNU General Public License v3.0

4 votes

@Override
AbstractSequenceClassifier<CoreLabel> classifier() {
	return classifiers.get();
}

Example #9

Source File: StanfordNamedEntityExtractor.java From CLIFF with Apache License 2.0

4 votes

/**
 * Get extracted locations from a plain-text body.
 * 
 * @param textToParse                      Text content to perform extraction on.
 * @param manuallyReplaceDemonyms   Can slow down performance quite a bit
 * @param language   What language to parse in
 * @return          All the entities mentioned
 */
@Override
public ExtractedEntities extractEntities(String textToParse, boolean manuallyReplaceDemonyms, String language) {
    ExtractedEntities entities = new ExtractedEntities();

    if (textToParse==null || textToParse.length()==0){
        logger.warn("input to extractEntities was null or zero!");
        return entities; 
    }

    String text = textToParse;
    if(manuallyReplaceDemonyms){    // this is a noticeable performance hit
        logger.debug("Replacing all demonyms by hand");
        text = demonyms.replaceAll(textToParse);
    }
    
    AbstractSequenceClassifier<CoreMap> recognizer = recognizerByLanguage.get(language);
    
    // extract entities as <Entity Type, Start Index, Stop Index>
    List<Triple<String, Integer, Integer>> extractedEntities = 
    		recognizer.classifyToCharacterOffsets(text);

    if (extractedEntities != null) {
        for (Triple<String, Integer, Integer> extractedEntity : extractedEntities) {
            String entityName = text.substring(extractedEntity.second(), extractedEntity.third());
            int position = extractedEntity.second();
        	switch(extractedEntity.first){
            case "PERS":       // spanish
            case "I-PER":      // german
            case "PERSON":      // english
                if(personToPlaceSubstitutions.contains(entityName)){
                    entities.addLocation( getLocationOccurrence(personToPlaceSubstitutions.getSubstitution(entityName), position) );
                    logger.debug("Changed person "+entityName+" to a place");
                } else {
                    PersonOccurrence person = new PersonOccurrence(entityName, position);
                    entities.addPerson( person );
                }
                break;
            case "LUG":
            case "I-LOC":      // german
            case "LOCATION":    // english
                if(!locationBlacklist.contains(entityName)){
                    entities.addLocation( getLocationOccurrence(entityName, position) );
                } else {
                   logger.debug("Ignored blacklisted location "+entityName);
                }
                break;
            case "ORG":            // spanish
            case "I-ORG":          // german
            case "ORGANIZATION":    // english
                OrganizationOccurrence organization = new OrganizationOccurrence(entityName, position);
                entities.addOrganization( organization );
                break;
            case "OTROS":	// spanish
            case "MISC":    // if you're using the slower 4class model
                if (demonyms.contains(entityName)) {
                    logger.debug("Found and adding a MISC demonym "+entityName);
                    entities.addLocation( getLocationOccurrence(entityName, position) );
                }
                break;
            default:
                logger.error("Unknown NER type :"+ extractedEntity.first);
            }
        }
    }

    return entities;
}

Example #10

Source File: StanfordNamedEntityExtractor.java From CLIFF with Apache License 2.0

4 votes

@Override
@SuppressWarnings("rawtypes")
public ExtractedEntities extractEntitiesFromSentences(Map[] sentences, boolean manuallyReplaceDemonyms, String language) {
	ExtractedEntities entities = new ExtractedEntities();

    if (sentences.length==0){
        logger.warn("input to extractEntities was null or zero!");
        return entities; 
    }

    if(manuallyReplaceDemonyms){    // this is a noticeable performance hit
        logger.debug("Replacing all demonyms by hand");
    }
    
    AbstractSequenceClassifier<CoreMap> recognizer = recognizerByLanguage.get(language);
    
    for(Map s:sentences){
        String storySentencesId = s.get("story_sentences_id").toString();
        String text = s.get("sentence").toString();
        if(manuallyReplaceDemonyms){    // this is a noticeable performance hit
            text = demonyms.replaceAll(text);
        }
        // extract entities as <Entity Type, Start Index, Stop Index>
        List<Triple<String, Integer, Integer>> extractedEntities = 
            recognizer.classifyToCharacterOffsets(text);
        if (extractedEntities != null) {
            for (Triple<String, Integer, Integer> extractedEntity : extractedEntities) {
                String entityName = text.substring(extractedEntity.second(), extractedEntity.third());
                int position = extractedEntity.second();
                switch(extractedEntity.first){
                case "PERSON":
                    if(personToPlaceSubstitutions.contains(entityName)){
                        entities.addLocation( getLocationOccurrence(personToPlaceSubstitutions.getSubstitution(entityName), position) );
                        logger.debug("Changed person "+entityName+" to a place");
                    } else {
                        PersonOccurrence person = new PersonOccurrence(entityName, position);
                        entities.addPerson( person );
                    }
                    break;
                case "LOCATION":
                    if(!locationBlacklist.contains(entityName)){
                        LocationOccurrence loc = getLocationOccurrence(entityName, position);  
                        // save the sentence id here
                        entities.addLocation( new SentenceLocationOccurrence(loc.getText(), storySentencesId) );
                    } else {
                       logger.debug("Ignored blacklisted location "+entityName);
                    }
                    break;
                case "ORGANIZATION":
                    OrganizationOccurrence organization = new OrganizationOccurrence(entityName, position);
                    entities.addOrganization( organization );
                    break;
                case "MISC":    // if you're using the slower 4class model
                    if (demonyms.contains(entityName)) {
                        logger.debug("Found and adding a MISC demonym "+entityName);
                        entities.addLocation( getLocationOccurrence(entityName, position) );
                    }
                    break;
                default:
                    logger.error("Unknown NER type :"+ extractedEntity.first);
                }
            }
        }
    }

    return entities;
}

Example #11

Source File: WorkflowDemoNERD.java From CLAVIN-NERD with GNU General Public License v2.0

4 votes

/**
 * Sometimes, you might already be using Stanford NER elsewhere in
 * your application, and you'd like to just pass the output from
 * Stanford NER directly into CLAVIN, without having to re-run the
 * input through Stanford NER just to use CLAVIN. This example
 * shows you how to very easily do exactly that.
 *
 * @throws IOException
 * @throws ClavinException
 */
private static void resolveStanfordEntities() throws IOException, ClavinException {

    /*#####################################################################
     *
     * Start with Stanford NER -- no need to get CLAVIN involved for now.
     *
     *###################################################################*/

    // instantiate Stanford NER entity extractor
    InputStream mpis = WorkflowDemoNERD.class.getClassLoader().getResourceAsStream("models/english.all.3class.distsim.prop");
    Properties mp = new Properties();
    mp.load(mpis);
    AbstractSequenceClassifier<CoreMap> namedEntityRecognizer =
            CRFClassifier.getJarClassifier("/models/english.all.3class.distsim.crf.ser.gz", mp);

    // Unstructured text file about Somalia to be geoparsed
    File inputFile = new File("src/test/resources/sample-docs/Somalia-doc.txt");

    // Grab the contents of the text file as a String
    String inputString = TextUtils.fileToString(inputFile);

    // extract entities from input text using Stanford NER
    List<Triple<String, Integer, Integer>> entitiesFromNER = namedEntityRecognizer.classifyToCharacterOffsets(inputString);

    /*#####################################################################
     *
     * Now, CLAVIN comes into play...
     *
     *###################################################################*/

    // convert Stanford NER output to ClavinLocationResolver input
    List<LocationOccurrence> locationsForCLAVIN = convertNERtoCLAVIN(entitiesFromNER, inputString);

    // instantiate the CLAVIN location resolver
    ClavinLocationResolver clavinLocationResolver = new ClavinLocationResolver(new LuceneGazetteer(new File("./IndexDirectory")));

    // resolve location entities extracted from input text
    List<ResolvedLocation> resolvedLocations = clavinLocationResolver.resolveLocations(locationsForCLAVIN, 1, 1, false);

    // Display the ResolvedLocations found for the location names
    for (ResolvedLocation resolvedLocation : resolvedLocations)
        System.out.println(resolvedLocation);
}

Example #12

Source File: NERService.java From aliada-tool with GNU General Public License v3.0

votes

abstract AbstractSequenceClassifier<CoreLabel> classifier();