edu.stanford.nlp.ie.crf.CRFClassifier Java Examples

The following examples show how to use edu.stanford.nlp.ie.crf.CRFClassifier. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: StanfordExtractorTest.java    From CLAVIN-NERD with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Checks conversion of Stanford NER output format into
 * {@link com.bericotech.clavin.resolver.ClavinLocationResolver}
 * input format.
 *
 * @throws IOException
 */
@Test
public void testConvertNERtoCLAVIN() throws IOException {
    InputStream mpis = this.getClass().getClassLoader().getResourceAsStream("models/english.all.3class.distsim.prop");
    Properties mp = new Properties();
    mp.load(mpis);
    AbstractSequenceClassifier<CoreMap> namedEntityRecognizer =
            CRFClassifier.getJarClassifier("/models/english.all.3class.distsim.crf.ser.gz", mp);

    String text = "I was born in Springfield and grew up in Boston.";
    List<Triple<String, Integer, Integer>> entitiesFromNER = namedEntityRecognizer.classifyToCharacterOffsets(text);

    List<LocationOccurrence> locationsForCLAVIN = convertNERtoCLAVIN(entitiesFromNER, text);
    assertEquals("wrong number of entities", 2, locationsForCLAVIN.size());
    assertEquals("wrong text for first entity", "Springfield", locationsForCLAVIN.get(0).getText());
    assertEquals("wrong position for first entity", 14, locationsForCLAVIN.get(0).getPosition());
    assertEquals("wrong text for second entity", "Boston", locationsForCLAVIN.get(1).getText());
    assertEquals("wrong position for second entity", 41, locationsForCLAVIN.get(1).getPosition());
}
 
Example #2
Source File: Chapter4.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 6 votes vote down vote up
private static void usingStanfordNER() {
        String model = getModelDir() + "\\english.conll.4class.distsim.crf.ser.gz";
        CRFClassifier<CoreLabel> classifier = CRFClassifier.getClassifierNoExceptions(model);

        String sentence = "";
        for (String element : sentences) {
            sentence += element;
        }

        List<List<CoreLabel>> entityList = classifier.classify(sentence);

        for (List<CoreLabel> internalList : entityList) {
            for (CoreLabel coreLabel : internalList) {
                String word = coreLabel.word();
                String category = coreLabel.get(CoreAnnotations.AnswerAnnotation.class);
//                System.out.println(word + ":" + category);
                if (!"O".equals(category)) {
                    System.out.println(word + ":" + category);
                }

            }

        }
    }
 
Example #3
Source File: Chapter4.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 6 votes vote down vote up
private static void usingStanfordNER() {
        String model = getModelDir() + "\\english.conll.4class.distsim.crf.ser.gz";
        CRFClassifier<CoreLabel> classifier = CRFClassifier.getClassifierNoExceptions(model);

        String sentence = "";
        for (String element : sentences) {
            sentence += element;
        }

        List<List<CoreLabel>> entityList = classifier.classify(sentence);

        for (List<CoreLabel> internalList : entityList) {
            for (CoreLabel coreLabel : internalList) {
                String word = coreLabel.word();
                String category = coreLabel.get(CoreAnnotations.AnswerAnnotation.class);
//                System.out.println(word + ":" + category);
                if (!"O".equals(category)) {
                    System.out.println(word + ":" + category);
                }

            }

        }
    }
 
Example #4
Source File: CRFPostprocessor.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
public CRFPostprocessor(Properties props) {
  // Currently, this class only supports one featureFactory.
  props.put("featureFactory", CRFPostprocessorFeatureFactory.class.getName());

  flags = new SeqClassifierFlags(props);
  classifier = new CRFClassifier<CoreLabel>(flags);
}
 
Example #5
Source File: StanfordNamedEntityExtractor.java    From CLIFF with Apache License 2.0 5 votes vote down vote up
private AbstractSequenceClassifier<CoreMap> recognizerForFiles(String NERmodel, String NERprop) throws IOException, ClassCastException, ClassNotFoundException {
    InputStream mpis = this.getClass().getClassLoader().getResourceAsStream("models/" + NERprop);
    Properties mp = new Properties();
    mp.load(mpis);
    AbstractSequenceClassifier<CoreMap> recognizer = (AbstractSequenceClassifier<CoreMap>) CRFClassifier.getClassifier("models/" + NERmodel, mp);
    return recognizer;
}
 
Example #6
Source File: StanfordAdapter.java    From jieba-solr with Apache License 2.0 5 votes vote down vote up
/**
 * 
 */
private StanfordAdapter(Reader input, String modelDir) {
	Properties props = new Properties();
	props.setProperty("sighanCorporaDict", modelDir);
	// props.setProperty("NormalizationTable", "data/norm.simp.utf8");
	// props.setProperty("normTableEncoding", "UTF-8");
	// below is needed because CTBSegDocumentIteratorFactory accesses it
	props.setProperty("serDictionary", modelDir + "/dict-chris6.ser.gz" + "," +  modelDir + "/dict-chris6.ser.gz");
	props.setProperty("inputEncoding", "UTF-8");
	props.setProperty("sighanPostProcessing", "true");

	segmenter = new CRFClassifier<CoreLabel>(props);
	segmenter.loadClassifierNoExceptions(modelDir + "/ctb.gz", props);
}
 
Example #7
Source File: StanfordNeTagger.java    From OpenEphyra with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Initializes the StanfordNeTagger with a custom model.
 * 
 * @param customSerializedClassifier path of the custom classifier to load
 */
public static boolean init(String customSerializedClassifier) {
	try {
		classifier =
			CRFClassifier.getClassifier(customSerializedClassifier);
		serializedClassifier = customSerializedClassifier;
		return true;
	} catch (Exception e) {
		return false;
	}
}
 
Example #8
Source File: NERSingletonService.java    From aliada-tool with GNU General Public License v3.0 5 votes vote down vote up
@Override
AbstractSequenceClassifier<CoreLabel> classifier() {
	synchronized(this) {
		if (classifier == null) {
				try {
					classifier = CRFClassifier.getClassifier(classifierFilePath);
				} catch (final Exception exception) {
					LOGGER.error(MessageCatalog._00052_CLASSIFIER_LOAD_FAILURE, classifierFilePath);
					classifier = NULL_OBJECT_CLASSIFIER;
				}
		}
		return classifier;
	}
}
 
Example #9
Source File: NERThreadLocalService.java    From aliada-tool with GNU General Public License v3.0 5 votes vote down vote up
protected AbstractSequenceClassifier<CoreLabel> initialValue() {
	try {
		return CRFClassifier.getClassifier(classifierFilePath);
	} catch (final Exception exception) {
		LOGGER.error(MessageCatalog._00052_CLASSIFIER_LOAD_FAILURE, classifierFilePath);
		return NULL_OBJECT_CLASSIFIER;
	}
}
 
Example #10
Source File: StanfordAdapter.java    From analyzer-solr with MIT License 5 votes vote down vote up
/**
 * 
 */
private StanfordAdapter(Reader input, String modelDir) {
	Properties props = new Properties();
	props.setProperty("sighanCorporaDict", modelDir);
	// props.setProperty("NormalizationTable", "data/norm.simp.utf8");
	// props.setProperty("normTableEncoding", "UTF-8");
	// below is needed because CTBSegDocumentIteratorFactory accesses it
	props.setProperty("serDictionary", modelDir + "/dict-chris6.ser.gz" + "," +  modelDir + "/dict-chris6.ser.gz");
	props.setProperty("inputEncoding", "UTF-8");
	props.setProperty("sighanPostProcessing", "true");

	segmenter = new CRFClassifier<CoreLabel>(props);
	segmenter.loadClassifierNoExceptions(modelDir + "/ctb.gz", props);
}
 
Example #11
Source File: StanfordChineseNER.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
@Override public void initialize(UimaContext aContext) throws ResourceInitializationException {
  super.initialize(aContext);
  expectedSuccessdingTags.put("GPE", "GPE");
  expectedSuccessdingTags.put("I-GPE", "I-GPE");
  expectedSuccessdingTags.put("B-GPE", "I-GPE");
  expectedSuccessdingTags.put("LOC", "LOC");
  expectedSuccessdingTags.put("I-LOC", "I-LOC");
  expectedSuccessdingTags.put("B-LOC", "I-LOC");
  expectedSuccessdingTags.put("PERSON", "PERSON");
  expectedSuccessdingTags.put("I-PER", "I-PER");
  expectedSuccessdingTags.put("B-PER", "I-PER");
  expectedSuccessdingTags.put("ORG", "ORG");
  expectedSuccessdingTags.put("I-ORG", "I-ORG");
  expectedSuccessdingTags.put("B-ORG", "I-ORG");
  expectedSuccessdingTags.put("MISC", "MISC");
  expectedSuccessdingTags.put("I-MISC", "I-MISC");
  expectedSuccessdingTags.put("B-MISC", "I-MISC");

  try {
    Properties props = ClassPathUtils.getPropertiesFromClasspath("edu/stanford/nlp/models/ner/chinese.kbp.distsim.prop");
    props.put("ner.useSUTime", "false"); //false not for english
    props.put("ner.applyNumericClassifiers", "false"); //false not for english
    props.put("mergeTags", "false");
    classifier = CRFClassifier.getClassifier("edu/stanford/nlp/models/ner/chinese.kbp.distsim.crf.ser.gz", props);
  } catch (IOException | ClassNotFoundException e) {
    throw new ResourceInitializationException(e);
  }
}
 
Example #12
Source File: CRFPreprocessor.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
public static CRFClassifier<CoreLabel> loadClassifier(String options) throws IllegalArgumentException {
  String[] inputFlags = options.split(" ");
  Properties props = StringUtils.argsToProperties(inputFlags);
  SeqClassifierFlags flags = new SeqClassifierFlags(props);
  CRFClassifier<CoreLabel> crfSegmenter = new CRFClassifier<>(flags);
  if(flags.loadClassifier == null) {
    throw new IllegalArgumentException("missing -loadClassifier flag for CRF preprocessor.");
  }
  crfSegmenter.loadClassifierNoExceptions(flags.loadClassifier, props);
  crfSegmenter.loadTagIndex();
  return crfSegmenter;
}
 
Example #13
Source File: TrainNerModel.java    From InformationExtraction with GNU General Public License v3.0 5 votes vote down vote up
public static void main(String[] args) {
    String path = IntelConfig.DEPARTMENT_TRAIN_PROPERTY;
    Properties props = StringUtils.propFileToProperties(path);

    SeqClassifierFlags flags = new SeqClassifierFlags(props);
    CRFClassifier<CoreLabel> crf = new CRFClassifier<CoreLabel>(flags);
    crf.train();
    String modelPath = props.getProperty("serializeTo");
    crf.serializeClassifier(modelPath);
    System.out.println("Build model to " + modelPath);
}
 
Example #14
Source File: NERTool.java    From Criteria2Query with Apache License 2.0 5 votes vote down vote up
public static void train(String traindatapath,String targetpath){
	long startTime = System.nanoTime();
       /* Step 1: learn the classifier from the training data */
       String trainFile = traindatapath; 
       /* Learn the classifier from the training data */
       String serializeFileLoc =targetpath;
       // properties: https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/ie/NERFeatureFactory.html
       Properties props = new Properties();
       props.put("trainFile", trainFile); // To train with multiple files, a comma separated list
       props.put("map", "word=0,answer=1");
       props.put("useClassFeature", "true");
       props.put("useNGrams", "true");
       props.put("noMidNGrams", "true");
       props.put("maxNGramLeng", "6");
       props.put("useDisjunctive", "true");
       props.put("usePrev", "true");
       props.put("useNext", "true");
       props.put("useSequences", "true");
       props.put("usePrevSequences", "true");
       props.put("maxLeft", "1");
       props.put("useTypeSeqs", "true");
       props.put("useTypeSeqs2", "true");
       props.put("useTypeySequences", "true");
       props.put("wordShape", "chris2useLC");
       // props.put("printFeatures", "true");
       // This feature can be turned off in recent versions with the flag -useKnownLCWords false
       // https://nlp.stanford.edu/software/crf-faq.html question 13

       SeqClassifierFlags flags = new SeqClassifierFlags(props);
       CRFClassifier<CoreLabel> crf = new CRFClassifier<CoreLabel>(flags);
       crf.train();
       crf.serializeClassifier(serializeFileLoc);
       
}
 
Example #15
Source File: StanfordQuery.java    From Library with MIT License 5 votes vote down vote up
public StanfordQuery() {
    try {
        LOGGER.debug("Classifier loading started");
        ClassLoader classLoader = LibraryServicesImpl.class.getClassLoader();
        File file = new File(classLoader.getResource("nlp/english.all.3class.distsim.crf.ser.gz").getFile());
        this.classifier = CRFClassifier.getClassifier(file);
        LOGGER.debug("Classifier loaded successfully");
    } catch (Exception e) {
        LOGGER.error("Error while loading classifier", e);
    }
}
 
Example #16
Source File: ChinesePreprocessor.java    From phrasal with GNU General Public License v3.0 4 votes vote down vote up
public ChinesePreprocessor(CRFClassifier<CoreLabel> crfSegmenter) {
  super(crfSegmenter);
}
 
Example #17
Source File: ChinesePreprocessor.java    From phrasal with GNU General Public License v3.0 4 votes vote down vote up
@Override
protected String[] getSegmentedText(List<CoreLabel> doc, CRFClassifier<CoreLabel> crfSegmenter) {
  return ChineseStringUtils.combineSegmentedSentence(doc, crfSegmenter.flags).split("\\s+");
}
 
Example #18
Source File: CRFPreprocessor.java    From phrasal with GNU General Public License v3.0 4 votes vote down vote up
public CRFPreprocessor(CRFClassifier<CoreLabel> crfSegmenter) {
  this.crfSegmenter = crfSegmenter;
}
 
Example #19
Source File: NERecognizer.java    From gAnswer with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
public NERecognizer() {
	serializedClassifier = Globals.localPath+"lib/stanford-ner-2012-11-11/classifiers/english.all.3class.distsim.crf.ser.gz";
	classifier  = CRFClassifier.getClassifierNoExceptions(serializedClassifier);
}
 
Example #20
Source File: WorkflowDemoNERD.java    From CLAVIN-NERD with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Sometimes, you might already be using Stanford NER elsewhere in
 * your application, and you'd like to just pass the output from
 * Stanford NER directly into CLAVIN, without having to re-run the
 * input through Stanford NER just to use CLAVIN. This example
 * shows you how to very easily do exactly that.
 *
 * @throws IOException
 * @throws ClavinException
 */
private static void resolveStanfordEntities() throws IOException, ClavinException {

    /*#####################################################################
     *
     * Start with Stanford NER -- no need to get CLAVIN involved for now.
     *
     *###################################################################*/

    // instantiate Stanford NER entity extractor
    InputStream mpis = WorkflowDemoNERD.class.getClassLoader().getResourceAsStream("models/english.all.3class.distsim.prop");
    Properties mp = new Properties();
    mp.load(mpis);
    AbstractSequenceClassifier<CoreMap> namedEntityRecognizer =
            CRFClassifier.getJarClassifier("/models/english.all.3class.distsim.crf.ser.gz", mp);

    // Unstructured text file about Somalia to be geoparsed
    File inputFile = new File("src/test/resources/sample-docs/Somalia-doc.txt");

    // Grab the contents of the text file as a String
    String inputString = TextUtils.fileToString(inputFile);

    // extract entities from input text using Stanford NER
    List<Triple<String, Integer, Integer>> entitiesFromNER = namedEntityRecognizer.classifyToCharacterOffsets(inputString);

    /*#####################################################################
     *
     * Now, CLAVIN comes into play...
     *
     *###################################################################*/

    // convert Stanford NER output to ClavinLocationResolver input
    List<LocationOccurrence> locationsForCLAVIN = convertNERtoCLAVIN(entitiesFromNER, inputString);

    // instantiate the CLAVIN location resolver
    ClavinLocationResolver clavinLocationResolver = new ClavinLocationResolver(new LuceneGazetteer(new File("./IndexDirectory")));

    // resolve location entities extracted from input text
    List<ResolvedLocation> resolvedLocations = clavinLocationResolver.resolveLocations(locationsForCLAVIN, 1, 1, false);

    // Display the ResolvedLocations found for the location names
    for (ResolvedLocation resolvedLocation : resolvedLocations)
        System.out.println(resolvedLocation);
}
 
Example #21
Source File: Chapter6.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 4 votes vote down vote up
private static void usingStanfordSentimentAnalysis() {
    String review = "An overly sentimental film with a somewhat "
            + "problematic message, but its sweetness and charm "
            + "are occasionally enough to approximate true depth "
            + "and grace. ";

    String sam = "Sam was an odd sort of fellow. Not prone to angry and "
            + "not prone to merriment. Overall, an odd fellow.";
    String mary = "Mary thought that custard pie was the best pie in the "
            + "world. However, she loathed chocolate pie.";
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, parse, sentiment");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    Annotation annotation = new Annotation(review);
    pipeline.annotate(annotation);

    System.out.println("---sentimentText");
    String[] sentimentText = {"Very Negative", "Negative", "Neutral",
        "Positive", "Very Positive"};
    for (CoreMap sentence : annotation.get(
            CoreAnnotations.SentencesAnnotation.class)) {
        Tree tree = sentence.get(
                SentimentCoreAnnotations.AnnotatedTree.class);
        System.out.println("---Number of children: " + tree.numChildren());
        System.out.println("[" + tree.getChild(0) + "][" + tree.getChild(1) + "]");
        tree.printLocalTree();
        int score = RNNCoreAnnotations.getPredictedClass(tree);
        System.out.println(sentimentText[score]);
    }

    // Classifer
    CRFClassifier crf
            = CRFClassifier.getClassifierNoExceptions(
                    "C:/Current Books in Progress/NLP and Java/Models"
                    + "/english.all.3class.distsim.crf.ser.gz");
    String S1 = "Good afternoon Rajat Raina, how are you today?";
    String S2 = "I go to school at Stanford University, which is located in California.";
    System.out.println(crf.classifyToString(S1));
    System.out.println(crf.classifyWithInlineXML(S2));
    System.out.println(crf.classifyToString(S2, "xml", true));

    Object classification[] = crf.classify(S2).toArray();
    for (int i = 0; i < classification.length; i++) {
        System.out.println(classification[i]);
    }
}
 
Example #22
Source File: StanfordExtractor.java    From CLAVIN-NERD with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Builds a {@link StanfordExtractor} by instantiating the 
 * Stanford NER named entity recognizer with a specified 
 * language model.
 * 
 * @param NERmodel                      path to Stanford NER language model
 * @param NERprop						path to property file for Stanford NER language model
 * @throws IOException 					Error by contract
 * @throws ClassNotFoundException 		Error by contract
 * @throws ClassCastException 			Error by contract
 */
//@SuppressWarnings("unchecked")
public StanfordExtractor(String NERmodel, String NERprop) throws IOException, ClassCastException, ClassNotFoundException {
	
	InputStream mpis = this.getClass().getClassLoader().getResourceAsStream("models/" + NERprop);
	Properties mp = new Properties();
	mp.load(mpis);
   	
	namedEntityRecognizer = CRFClassifier.getJarClassifier("/models/" + NERmodel, mp);
}
 
Example #23
Source File: CRFPreprocessor.java    From phrasal with GNU General Public License v3.0 votes vote down vote up
abstract protected String[] getSegmentedText(List<CoreLabel> doc, CRFClassifier<CoreLabel> crfSegmenter);