edu.stanford.nlp.util.CoreMap Java Examples

The following examples show how to use edu.stanford.nlp.util.CoreMap. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Minimization.java    From minie with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Given a list of words to be removed and a list of matched nodes, remove the words to be removed from the phrase and
 * empty that list, also empty the list of matched nodes
 * @param remWords
 * @param matchedNodes
 */
public void dropWords(List<CoreMap> remWords, List<CoreMap> matchWords){
    matchWords.clear();
    // in addition to removing the words, save them in a separate list
    ObjectArrayList<SemanticGraphEdge> droppedEdges = CoreNLPUtils.listOfCoreMapWordsToParentEdges(this.sg, remWords);
    /*ObjectArrayList<SemanticGraphEdge> droppedEdges = new ObjectArrayList<SemanticGraphEdge>();
    for (IndexedWord word: remWordsArray) {
        SemanticGraphEdge edge = this.sg.getEdge(this.sg.getParent(word), word);
        droppedEdges.add(edge);
    }*/
    this.phrase.addDroppedEdges(droppedEdges);
    this.phrase.addDroppedWords(CoreNLPUtils.getWordListFromCoreMapList(remWords));
    // remove words
    this.phrase.removeCoreLabelWordsFromList(remWords);
    remWords.clear();
}
 
Example #2
Source File: CoreNLP.java    From gAnswer with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public Tree getParseTree (String text) {
    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);
    
    // run all Annotators on this text
    pipeline_lemma.annotate(document);
    
    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    
    for(CoreMap sentence: sentences) {
    	// this is the parse tree of the current sentence
    	return sentence.get(TreeAnnotation.class);
    }	    
    
    return null;
}
 
Example #3
Source File: Chapter5.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 6 votes vote down vote up
private static void usingStanfordPOSTagger() {
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos");
    props.put("pos.model", "C:\\Current Books in Progress\\NLP and Java\\Models\\english-caseless-left3words-distsim.tagger");
    props.put("pos.maxlen", 10);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation document = new Annotation(theSentence);
    pipeline.annotate(document);

    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            String word = token.get(TextAnnotation.class);
            String pos = token.get(PartOfSpeechAnnotation.class);
            System.out.print(word + "/" + pos + " ");
        }
        System.out.println();

        try {
            pipeline.xmlPrint(document, System.out);
            pipeline.prettyPrint(document, System.out);
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }
}
 
Example #4
Source File: KBPSemgrexExtractor.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
@Override
public Pair<String, Double> classify(KBPInput input) {
  for (RelationType rel : RelationType.values()) {

    if (rules.containsKey(rel) &&
        rel.entityType == input.subjectType &&
        rel.validNamedEntityLabels.contains(input.objectType)) {
      Collection<SemgrexPattern> rulesForRel = rules.get(rel);
      CoreMap sentence = input.sentence.asCoreMap(Sentence::nerTags, Sentence::dependencyGraph);
      boolean matches
          = matches(sentence, rulesForRel, input,
          sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)) ||
          matches(sentence, rulesForRel, input,
              sentence.get(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class));
      if (matches) {
        //logger.log("MATCH for " + rel +  ". " + sentence: + sentence + " with rules for  " + rel);
        return Pair.makePair(rel.canonicalName, 1.0);
      }
    }
  }

  return Pair.makePair(NO_RELATION, 1.0);
}
 
Example #5
Source File: StanfordRNNDParser.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
@Override public void process(JCas jCas) throws AnalysisEngineProcessException {
  mappingProvider.configure(jCas.getCas());
  DKPro2CoreNlp converter = new DKPro2CoreNlp();
  Annotation annotatios = converter.convert(jCas, new Annotation());
  List<CoreMap> sentences = annotatios.get(CoreAnnotations.SentencesAnnotation.class);
  for (CoreMap sentence : sentences) {
    GrammaticalStructure gs = parser.predict(sentence);
    SemanticGraph semanticGraph = SemanticGraphFactory.makeFromTree(gs, SemanticGraphFactory.Mode.CCPROCESSED, GrammaticalStructure.Extras.MAXIMAL, null);;
    semanticGraph.prettyPrint();
    semanticGraph = semanticGraphUniversalEnglishToEnglish(semanticGraph);
    sentence.set(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class, semanticGraph);
    for(SemanticGraphEdge edge: semanticGraph.edgeListSorted()) {
      System.out.println(edge);
    }
  }
  convertDependencies(jCas, annotatios, true);
}
 
Example #6
Source File: StanfordExtractorTest.java    From CLAVIN-NERD with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Checks conversion of Stanford NER output format into
 * {@link com.bericotech.clavin.resolver.ClavinLocationResolver}
 * input format.
 *
 * @throws IOException
 */
@Test
public void testConvertNERtoCLAVIN() throws IOException {
    InputStream mpis = this.getClass().getClassLoader().getResourceAsStream("models/english.all.3class.distsim.prop");
    Properties mp = new Properties();
    mp.load(mpis);
    AbstractSequenceClassifier<CoreMap> namedEntityRecognizer =
            CRFClassifier.getJarClassifier("/models/english.all.3class.distsim.crf.ser.gz", mp);

    String text = "I was born in Springfield and grew up in Boston.";
    List<Triple<String, Integer, Integer>> entitiesFromNER = namedEntityRecognizer.classifyToCharacterOffsets(text);

    List<LocationOccurrence> locationsForCLAVIN = convertNERtoCLAVIN(entitiesFromNER, text);
    assertEquals("wrong number of entities", 2, locationsForCLAVIN.size());
    assertEquals("wrong text for first entity", "Springfield", locationsForCLAVIN.get(0).getText());
    assertEquals("wrong position for first entity", 14, locationsForCLAVIN.get(0).getPosition());
    assertEquals("wrong text for second entity", "Boston", locationsForCLAVIN.get(1).getText());
    assertEquals("wrong position for second entity", 41, locationsForCLAVIN.get(1).getPosition());
}
 
Example #7
Source File: NumberOfToken.java    From NLIWOD with GNU Affero General Public License v3.0 6 votes vote down vote up
/***
 * Returns a list of all noun phrases of the question q.
 * @param q  a question
 * @return list of noun phrases
 */
private ArrayList<String> getNounPhrases(String q) {
		ArrayList<String> nounP = new ArrayList<String>();
    
		Annotation annotation = new Annotation(q);
       PIPELINE.annotate(annotation);
     
       List<CoreMap> question = annotation.get(CoreAnnotations.SentencesAnnotation.class);
       
       for (CoreMap sentence : question) {
           SemanticGraph basicDeps = sentence.get(BasicDependenciesAnnotation.class);
           Collection<TypedDependency> typedDeps = basicDeps.typedDependencies();
        
           Iterator<TypedDependency> dependencyIterator = typedDeps.iterator();
           while(dependencyIterator.hasNext()) {
           	TypedDependency dependency = dependencyIterator.next();
           	String depString = dependency.reln().toString();
           	if(depString.equals("compound") || depString.equals("amod")) {
           		String dep = dependency.dep().toString();
           		String gov = dependency.gov().toString();
           		nounP.add(dep.substring(0, dep.lastIndexOf("/")) + " " + gov.substring(0, gov.lastIndexOf("/")));
           	}
           }
       }    
       return nounP;
	}
 
Example #8
Source File: SentimentAnalyzer.java    From blog-codes with Apache License 2.0 6 votes vote down vote up
public SentimentResult getSentimentResult(String text) {
	SentimentClassification classification = new SentimentClassification();
	SentimentResult sentimentResult = new SentimentResult();
	if (text != null && text.length() > 0) {
		Annotation annotation = pipeline.process(text);
		for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
			Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
			SimpleMatrix simpleMatrix = RNNCoreAnnotations.getPredictions(tree);

			classification.setVeryNegative((double) Math.round(simpleMatrix.get(0) * 100d));
			classification.setNegative((double) Math.round(simpleMatrix.get(1) * 100d));
			classification.setNeutral((double) Math.round(simpleMatrix.get(2) * 100d));
			classification.setPositive((double) Math.round(simpleMatrix.get(3) * 100d));
			classification.setVeryPositive((double) Math.round(simpleMatrix.get(4) * 100d));

			String setimentType = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
			sentimentResult.setSentimentType(setimentType);
			sentimentResult.setSentimentClass(classification);
			sentimentResult.setSentimentScore(RNNCoreAnnotations.getPredictedClass(tree));
		}
	}
	return sentimentResult;
}
 
Example #9
Source File: IntelKBPSemgrexExtractor.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
@Override
public Pair<String, Double> classify(KBPInput input) {
    for (RelationType rel : RelationType.values()) {

        if (rules.containsKey(rel) &&
                rel.entityType == input.subjectType &&
                rel.validNamedEntityLabels.contains(input.objectType)) {
            Collection<SemgrexPattern> rulesForRel = rules.get(rel);
            CoreMap sentence = input.sentence.asCoreMap(Sentence::nerTags, Sentence::dependencyGraph);
            boolean matches
                    = matches(sentence, rulesForRel, input,
                    sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)) ||
                    matches(sentence, rulesForRel, input,
                            sentence.get(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class));
            if (matches) {
                //logger.log("MATCH for " + rel +  ". " + sentence: + sentence + " with rules for  " + rel);
                return Pair.makePair(rel.canonicalName, 1.0);
            }
        }
    }

    return Pair.makePair(NO_RELATION, 1.0);
}
 
Example #10
Source File: RegexNerTest.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static List<String> extractNER(String doc){
    Annotation document = new Annotation(doc);

    pipeline.annotate(document);
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    List<String> result = new ArrayList<String>();
    for(CoreMap sentence: sentences) {
        // traversing the words in the current sentence
        // a CoreLabel is a CoreMap with additional token-specific methods
        for (CoreLabel token: sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            // this is the text of the token
            String word = token.get(CoreAnnotations.TextAnnotation.class);
            // this is the POS tag of the token
            String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
            // this is the NER label of the token
            String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
            result.add(ne);
        }
    }
    return result;
}
 
Example #11
Source File: SentimentAnalyzer.java    From hazelcast-jet-demos with Apache License 2.0 6 votes vote down vote up
private double getScore(List<CoreMap> sentences, double overallSentiment) {
    int matrixIndex =
            overallSentiment < -0.5  ? 0  // very negative
            : overallSentiment < 0.0 ? 1  // negative
            : overallSentiment < 0.5 ? 3  // positive
            : 4;                       // very positive
    double sum = 0;
    int numberOfSentences = 0;
    for (CoreMap sentence : sentences) {
        Tree sentiments = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
        int predictedClass = RNNCoreAnnotations.getPredictedClass(sentiments);
        if (predictedClass == 2) { // neutral
            continue;
        }
        SimpleMatrix matrix = RNNCoreAnnotations.getPredictions(sentiments);
        sum += matrix.get(matrixIndex);
        numberOfSentences++;
    }
    return sum / numberOfSentences;
}
 
Example #12
Source File: ComparisonUtils.java    From NLIWOD with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Retrieves a part of speech from the given string, depending on the parameter tag.
 * JJR for comparatives and JJS for superlatives.
 * @param question String to retrieve words from. 
 * @param tag JJR for comparatives and JJS for superlatives.
 * @return List of the retrieved words. 
 */
private ArrayList<String> getWords(String question, String tag) {
	if(question == null || tag == null) return null;
	Annotation annotation = new Annotation(question);
       PIPELINE.annotate(annotation);  
       List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
       
		ArrayList<String> words = new ArrayList<String>();		
		for (CoreMap sentence : sentences) {
           List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
           for(CoreLabel token: tokens) {
              	if(token.tag().startsWith(tag)){
           		String word = token.toString();
           		words.add(word.substring(0, word.lastIndexOf("-")));
           	}
           }
       }       	
		return words;
	}
 
Example #13
Source File: CorefExample.java    From blog-codes with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	Annotation document = new Annotation(
			"Barack Obama was born in Hawaii.  He is the president. Obama was elected in 2008.");
	Properties props = new Properties();
	props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,coref");
	StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	pipeline.annotate(document);
	System.out.println("---");
	System.out.println("coref chains");
	for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
		System.out.println("\t" + cc);
	}
	for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
		System.out.println("---");
		System.out.println("mentions");
		for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) {
			System.out.println("\t" + m);
		}
	}
}
 
Example #14
Source File: KBPModel.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static HashMap<RelationTriple, String> extract(String doc) {

        Annotation ann = new Annotation(doc);
        pipeline.annotate(ann);
        HashMap<RelationTriple, String> relations = new HashMap<RelationTriple, String>();

        for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
            for(RelationTriple r : sentence.get(CoreAnnotations.KBPTriplesAnnotation.class)){
                if(r.relationGloss().trim().equals("per:title")
                        || r.relationGloss().trim().equals("per:employee_of")
                        || r.relationGloss().trim().equals("org:top_members/employees")){
                    relations.put(r, sentence.toString());
                }
            }
        }
        return relations;
    }
 
Example #15
Source File: StanfordCoreNLPTest.java    From java_in_examples with Apache License 2.0 6 votes vote down vote up
public static void main(String[] s) {
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    // read some text in the text variable
    String text = "\"But I do not want to go among mad people,\" Alice remarked.\n" +
            "\"Oh, you can not help that,\" said the Cat: \"we are all mad here. I am mad. You are mad.\"\n" +
            "\"How do you know I am mad?\" said Alice.\n" +
            "\"You must be,\" said the Cat, \"or you would not have come here.\" This is awful, bad, disgusting";

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);

    // run all Annotators on this text
    pipeline.annotate(document);

    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        String sentiment = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
        System.out.println(sentiment + "\t" + sentence);
    }
}
 
Example #16
Source File: RelationExtractor.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static HashMap<String, String> extract(String sentence) {
    Annotation doc = new Annotation(sentence);
    pipeline.annotate(doc);
    r.annotate(doc);
    HashMap<String, String> map = new HashMap<String, String>();
    for(CoreMap s: doc.get(CoreAnnotations.SentencesAnnotation.class)){
      List<RelationMention> rls  = s.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
      for(RelationMention rl: rls){
        if(rl.getType().equals("Work_For")){
          System.out.println(rl);
          String organization = "";
          String people = "";
          for (EntityMention entity: rl.getEntityMentionArgs()){
            if(entity.getType().equals("ORGANIZATION")){
              organization = entity.getValue();
            }
            if(entity.getType().equals("PEOPLE")){
              people = entity.getValue();
            }
          }
          map.put(people, organization);
        }
      }
    }
    return map;
}
 
Example #17
Source File: Minimization.java    From minie with GNU General Public License v3.0 6 votes vote down vote up
/** Given a phrase, if it contains NERs, make a dictionary minimization around them **/
public void namedEntityDictionaryMinimization(List<CoreMap> remWords, List<CoreMap> matchWords){
    // If (.* DT+ [RB|JJ]* NER+ .*) => drop (DT+)
    this.tPattern = TokenSequencePattern.compile(REGEX.T_RB_JJ_NER);
    this.tMatcher = tPattern.getMatcher(this.phrase.getWordCoreLabelList());
    while (this.tMatcher.find()){         
        matchWords = tMatcher.groupNodes();
        
        for (CoreMap cm: matchWords){
            CoreLabel cl = new CoreLabel(cm);
            if (cl.lemma() == null) cl.setLemma(cl.word());
            
            // Check if the word is DT, drop it
            if ((CoreNLPUtils.isAdj(cl.tag()) || CoreNLPUtils.isAdverb(cl.tag())) 
                    && cl.ner().equals(NE_TYPE.NO_NER)){
                remWords.add(cm);   
            }
        }
        
        // Drop the words not found in dict. 
        this.dropWordsNotFoundInDict(matchWords, remWords);
    }
    
    // Do the safe minimization
    this.namedEntitySafeMinimization(remWords, matchWords);
}
 
Example #18
Source File: InteractiveDriver.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args) throws IOException {
    Properties props = StringUtils.argsToProperties(args);
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
    props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");
    Set<String> interested = Stream.of("per:title", "per:employee_of", "org:top_members/employees").collect(Collectors.toSet());
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    IOUtils.console("sentence> ", line -> {
        Annotation ann = new Annotation(line);
        pipeline.annotate(ann);
        for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
            sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(r -> {
                 String relation = r.relationGloss();
                if(interested.contains(relation)) {
                    System.err.println(r);
                }
            });
        }
    });
}
 
Example #19
Source File: CorefTool.java    From Criteria2Query with Apache License 2.0 6 votes vote down vote up
public void extractCoref() {
	String s="Subjects with hypothyroidism who are on stable treatment for 3 months prior to screening are required to have TSH and free thyroxine (FT4) obtained. If the TSH value is out of range, but FT4 is normal, such cases should be discussed directly with the JRD responsible safety physician before the subject is enrolled. If the FT4 value is out of range, the subject is not eligible.";
	 Annotation document = new Annotation(s);
	    Properties props = new Properties();
	    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,mention,coref");
	    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	    pipeline.annotate(document);
	    System.out.println("---");
	    System.out.println("coref chains");
	    for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
	      System.out.println("\t" + cc);
	    }
	    for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
	      System.out.println("---");
	      System.out.println("mentions");
	      for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) {
	        System.out.println("\t" + m);
	       }
	    }
}
 
Example #20
Source File: CoreNLPCache.java    From phrasal with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Load serialized CoreNLP annotations from a file.
 *  
 * @param filename
 */
public static int loadSerialized(String filename) {
  Annotation annotation = IOTools.deserialize(filename, Annotation.class);
  List<CoreMap> sentenceList = annotation.get(CoreAnnotations.SentencesAnnotation.class);

  if (sentenceList == null) {
    throw new RuntimeException("Unusable annotation (no sentences) in " + filename);
  }
  annotationMap = new HashMap<Integer,CoreMap>(sentenceList.size());
  int maxLineId = 0;
  for (CoreMap annotationSet : sentenceList) {
    // 1-indexed
    int lineId = annotationSet.get(CoreAnnotations.LineNumberAnnotation.class);
    maxLineId = lineId > maxLineId ? lineId : maxLineId;
    annotationMap.put(lineId-1, annotationSet);
  }
  return maxLineId + 1;
}
 
Example #21
Source File: QueryAnswerTypeAnalyzer.java    From NLIWOD with GNU Affero General Public License v3.0 5 votes vote down vote up
/***
 * Returns all words with the given tag. NN for all nouns, VB for all verbs, JJ for all adjectives.
 * @param question
 * @param tag NN for all nouns, VB for all verbs, JJ for all adjectives.
 * @return list of words with the given tag.
 */
private ArrayList<String> getWords(List<CoreMap> question, String tag) {
		ArrayList<String> words = new ArrayList<String>();
		
		for (CoreMap sentence : question) {
           List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
           for(CoreLabel token: tokens) {
              	if(token.tag().startsWith(tag)){
           		String word = token.toString();
           		words.add(word.substring(0, word.lastIndexOf("-")));
           	}
           }
       }       	
		return words;
	}
 
Example #22
Source File: Phrase.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/** Remove a set of words represented as core labels from the list of indexed words **/
public void removeCoreLabelWordsFromList(List<CoreMap> cmWords){
    ObjectArrayList<IndexedWord> rWords = new ObjectArrayList<>();
    for (CoreMap cm: cmWords){
        rWords.add(new IndexedWord(new CoreLabel(cm)));
    }
    this.removeWordsFromList(rWords);
}
 
Example #23
Source File: ObjSafeMinimization.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Minimize only the objects that are considered to have "safe patterns"
 * @param object: the objects phrase
 * @param sg: the semantic graph of the whole sentence
 */
public static void minimizeObject(AnnotatedPhrase object, SemanticGraph sg){
    Minimization simp = new Minimization(object, sg, new ObjectOpenHashSet<String>());
    
    // remWords: list of words to be removed (reusable variable)
    // matchWords: list of matched words from the regex (reusable variable)
    List<CoreMap> remWords = new ArrayList<>();
    List<CoreMap> matchWords = new ArrayList<>(); 
    
    // Safe minimization on the noun phrases and named entities
    simp.nounPhraseSafeMinimization(remWords, matchWords);
    simp.namedEntitySafeMinimization(remWords, matchWords);
}
 
Example #24
Source File: Minimization.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Given a list of words as core maps, check if they are contained in the dictionary
 * @param words
 * @return
 */
public boolean isCoreMapListInDictionary(List<CoreMap> cmWords){
    if (this.mwe.contains(CoreNLPUtils.listOfCoreMapWordsToLemmaString(cmWords)))
        return true;
    if (this.mwe.contains(CoreNLPUtils.listOfCoreMapWordsToWordString(cmWords)))
        return true;
    return false;
}
 
Example #25
Source File: DigiCompMorphAnnotator.java    From tint with GNU General Public License v3.0 5 votes vote down vote up
@Override
public void annotate(Annotation annotation) {
    if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
        for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
            List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
            for (CoreLabel c : tokens) {
                String[] morph_fatures = c.get(DigiMorphAnnotations.MorphoAnnotation.class).split(" ");
                String lemma = c.get(CoreAnnotations.LemmaAnnotation.class);
                if (morph_fatures.length > 1) {
                    List<String> comps = new ArrayList<>();
                    for (String m : morph_fatures) {
                        if (m.startsWith(lemma + "+") || m.startsWith(lemma + "~")) {
                            comps.add(m);
                        }
                    }
                    c.set(DigiMorphAnnotations.MorphoCompAnnotation.class, comps);
                } else {

                    if (morph_fatures[0].startsWith(lemma + "+") || morph_fatures[0].startsWith(lemma + "~")) {
                        c.set(DigiMorphAnnotations.MorphoCompAnnotation.class,
                                new ArrayList<String>(Arrays.asList(morph_fatures[0])));
                    }
                }
            }
        }
    }
}
 
Example #26
Source File: Minimization.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/** Given a phrase, if it contains a verb phrase, make a verb phrase safe minimization **/
public void verbPhraseSafeMinimization(List<CoreMap> remWords, List<CoreMap> matchWords){
    // Flags for checking certain conditions
    boolean isAdverb;
    boolean isNotNER;
    boolean containsNEG;
    
    // If the relation starts with a RB+ VB+, drop RB+
    this.tPattern = TokenSequencePattern.compile(REGEX.T_RB_VB);
    this.tMatcher = tPattern.getMatcher(this.phrase.getWordCoreLabelList());
    while (this.tMatcher.find()){   
        matchWords = tMatcher.groupNodes();
        
        for (CoreMap cm: matchWords){
            CoreLabel cl = new CoreLabel(cm);
            if (cl.lemma() == null) cl.setLemma(cl.word());
            
            isAdverb = CoreNLPUtils.isAdverb(cl.tag());
            isNotNER = cl.ner().equals(NE_TYPE.NO_NER);
            containsNEG = Polarity.NEG_WORDS.contains(cl.lemma().toLowerCase());
            
            // Check if the word is RB which is not a NER
            if (isAdverb && isNotNER && !containsNEG){
                remWords.add(cm);   
            }
        }
        this.dropWords(remWords, matchWords);
    }
}
 
Example #27
Source File: Extract.java    From phrases with Apache License 2.0 5 votes vote down vote up
public List<Pattern> run(String text) {
    List<Pattern> patterns = new ArrayList<Pattern>();

    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, parse");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation annotation = pipeline.process(text);
    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        patterns.addAll(ExtractSentencePatterns(sentence));
    }

    return patterns;
}
 
Example #28
Source File: JsonPipeline.java    From tac2015-event-detection with GNU General Public License v3.0 5 votes vote down vote up
/** runs the corenlp pipeline with all options, and returns all results as a JSON object. */
	@SuppressWarnings({ "rawtypes", "unchecked" })
	JsonNode processTextDocument(String doctext) {
		if (startMilli==-1)  startMilli = System.currentTimeMillis();
		numDocs++;
		numChars += doctext.length();

		Annotation document = new Annotation(doctext);
		pipeline.annotate(document);

		List<CoreMap> sentences = document.get(SentencesAnnotation.class);
		List<Map> outSentences = Lists.newArrayList();

		for(CoreMap sentence: sentences) {
			Map<String,Object> sent_info = Maps.newHashMap();
			addTokenBasics(sent_info, sentence);
			numTokens += ((List) sent_info.get("tokens")).size();
			for (String annotator : annotators()) {
				addAnnoToSentenceObject(sent_info, sentence, annotator);
			}
			outSentences.add(sent_info);
		}


		ImmutableMap.Builder b = new ImmutableMap.Builder();
//		b.put("text", doctext);
		b.put("sentences", outSentences);
		
		if (Lists.newArrayList(annotators()).contains("dcoref")) {
			List outCoref = getCorefInfo(document);
			b.put("entities", outCoref);
		}
		Map outDoc = b.build();
		return JsonUtil.toJson(outDoc);
	}
 
Example #29
Source File: StanfordNamedEntityExtractor.java    From CLIFF with Apache License 2.0 5 votes vote down vote up
private AbstractSequenceClassifier<CoreMap> recognizerForFiles(String NERmodel, String NERprop) throws IOException, ClassCastException, ClassNotFoundException {
    InputStream mpis = this.getClass().getClassLoader().getResourceAsStream("models/" + NERprop);
    Properties mp = new Properties();
    mp.load(mpis);
    AbstractSequenceClassifier<CoreMap> recognizer = (AbstractSequenceClassifier<CoreMap>) CRFClassifier.getClassifier("models/" + NERmodel, mp);
    return recognizer;
}
 
Example #30
Source File: SerializedDependencyToCoNLL.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
public static void main(String[] args) {

    Properties options = StringUtils.argsToProperties(args, optionArgDefs());
    String annotations = PropertiesUtils.get(options, "annotations", null, String.class);
    
    boolean changepreps = PropertiesUtils.getBool(options, "changepreps", false);
    
    int sentenceCount = CoreNLPCache.loadSerialized(annotations);
   
    
    CoreMap sentence;
    for (int i = 0; i < sentenceCount; i++) {
      try {  
        sentence = CoreNLPCache.get(i);
        if (sentence == null) {
          System.out.println();
          System.err.println("Empty sentence #" + i);
          continue;
        }
        printDependencies(sentence, changepreps);
        //System.err.println("---------------------------");
      } catch (Exception e) {
        System.err.println("SourceSentence #" + i);
        e.printStackTrace();
        return;
      }
    }
  }