edu.stanford.nlp.util.CoreMap Java Examples

The following examples show how to use edu.stanford.nlp.util.CoreMap. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CorefExample.java    From blog-codes with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	Annotation document = new Annotation(
			"Barack Obama was born in Hawaii.  He is the president. Obama was elected in 2008.");
	Properties props = new Properties();
	props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,coref");
	StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	pipeline.annotate(document);
	System.out.println("---");
	System.out.println("coref chains");
	for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
		System.out.println("\t" + cc);
	}
	for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
		System.out.println("---");
		System.out.println("mentions");
		for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) {
			System.out.println("\t" + m);
		}
	}
}
 
Example #2
Source File: SentimentAnalyzer.java    From blog-codes with Apache License 2.0 6 votes vote down vote up
public SentimentResult getSentimentResult(String text) {
	SentimentClassification classification = new SentimentClassification();
	SentimentResult sentimentResult = new SentimentResult();
	if (text != null && text.length() > 0) {
		Annotation annotation = pipeline.process(text);
		for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
			Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
			SimpleMatrix simpleMatrix = RNNCoreAnnotations.getPredictions(tree);

			classification.setVeryNegative((double) Math.round(simpleMatrix.get(0) * 100d));
			classification.setNegative((double) Math.round(simpleMatrix.get(1) * 100d));
			classification.setNeutral((double) Math.round(simpleMatrix.get(2) * 100d));
			classification.setPositive((double) Math.round(simpleMatrix.get(3) * 100d));
			classification.setVeryPositive((double) Math.round(simpleMatrix.get(4) * 100d));

			String setimentType = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
			sentimentResult.setSentimentType(setimentType);
			sentimentResult.setSentimentClass(classification);
			sentimentResult.setSentimentScore(RNNCoreAnnotations.getPredictedClass(tree));
		}
	}
	return sentimentResult;
}
 
Example #3
Source File: NumberOfToken.java    From NLIWOD with GNU Affero General Public License v3.0 6 votes vote down vote up
/***
 * Returns a list of all noun phrases of the question q.
 * @param q  a question
 * @return list of noun phrases
 */
private ArrayList<String> getNounPhrases(String q) {
		ArrayList<String> nounP = new ArrayList<String>();
    
		Annotation annotation = new Annotation(q);
       PIPELINE.annotate(annotation);
     
       List<CoreMap> question = annotation.get(CoreAnnotations.SentencesAnnotation.class);
       
       for (CoreMap sentence : question) {
           SemanticGraph basicDeps = sentence.get(BasicDependenciesAnnotation.class);
           Collection<TypedDependency> typedDeps = basicDeps.typedDependencies();
        
           Iterator<TypedDependency> dependencyIterator = typedDeps.iterator();
           while(dependencyIterator.hasNext()) {
           	TypedDependency dependency = dependencyIterator.next();
           	String depString = dependency.reln().toString();
           	if(depString.equals("compound") || depString.equals("amod")) {
           		String dep = dependency.dep().toString();
           		String gov = dependency.gov().toString();
           		nounP.add(dep.substring(0, dep.lastIndexOf("/")) + " " + gov.substring(0, gov.lastIndexOf("/")));
           	}
           }
       }    
       return nounP;
	}
 
Example #4
Source File: StanfordExtractorTest.java    From CLAVIN-NERD with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Checks conversion of Stanford NER output format into
 * {@link com.bericotech.clavin.resolver.ClavinLocationResolver}
 * input format.
 *
 * @throws IOException
 */
@Test
public void testConvertNERtoCLAVIN() throws IOException {
    InputStream mpis = this.getClass().getClassLoader().getResourceAsStream("models/english.all.3class.distsim.prop");
    Properties mp = new Properties();
    mp.load(mpis);
    AbstractSequenceClassifier<CoreMap> namedEntityRecognizer =
            CRFClassifier.getJarClassifier("/models/english.all.3class.distsim.crf.ser.gz", mp);

    String text = "I was born in Springfield and grew up in Boston.";
    List<Triple<String, Integer, Integer>> entitiesFromNER = namedEntityRecognizer.classifyToCharacterOffsets(text);

    List<LocationOccurrence> locationsForCLAVIN = convertNERtoCLAVIN(entitiesFromNER, text);
    assertEquals("wrong number of entities", 2, locationsForCLAVIN.size());
    assertEquals("wrong text for first entity", "Springfield", locationsForCLAVIN.get(0).getText());
    assertEquals("wrong position for first entity", 14, locationsForCLAVIN.get(0).getPosition());
    assertEquals("wrong text for second entity", "Boston", locationsForCLAVIN.get(1).getText());
    assertEquals("wrong position for second entity", 41, locationsForCLAVIN.get(1).getPosition());
}
 
Example #5
Source File: StanfordRNNDParser.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
@Override public void process(JCas jCas) throws AnalysisEngineProcessException {
  mappingProvider.configure(jCas.getCas());
  DKPro2CoreNlp converter = new DKPro2CoreNlp();
  Annotation annotatios = converter.convert(jCas, new Annotation());
  List<CoreMap> sentences = annotatios.get(CoreAnnotations.SentencesAnnotation.class);
  for (CoreMap sentence : sentences) {
    GrammaticalStructure gs = parser.predict(sentence);
    SemanticGraph semanticGraph = SemanticGraphFactory.makeFromTree(gs, SemanticGraphFactory.Mode.CCPROCESSED, GrammaticalStructure.Extras.MAXIMAL, null);;
    semanticGraph.prettyPrint();
    semanticGraph = semanticGraphUniversalEnglishToEnglish(semanticGraph);
    sentence.set(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class, semanticGraph);
    for(SemanticGraphEdge edge: semanticGraph.edgeListSorted()) {
      System.out.println(edge);
    }
  }
  convertDependencies(jCas, annotatios, true);
}
 
Example #6
Source File: Chapter5.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 6 votes vote down vote up
private static void usingStanfordPOSTagger() {
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos");
    props.put("pos.model", "C:\\Current Books in Progress\\NLP and Java\\Models\\english-caseless-left3words-distsim.tagger");
    props.put("pos.maxlen", 10);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation document = new Annotation(theSentence);
    pipeline.annotate(document);

    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            String word = token.get(TextAnnotation.class);
            String pos = token.get(PartOfSpeechAnnotation.class);
            System.out.print(word + "/" + pos + " ");
        }
        System.out.println();

        try {
            pipeline.xmlPrint(document, System.out);
            pipeline.prettyPrint(document, System.out);
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }
}
 
Example #7
Source File: IntelKBPSemgrexExtractor.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
@Override
public Pair<String, Double> classify(KBPInput input) {
    for (RelationType rel : RelationType.values()) {

        if (rules.containsKey(rel) &&
                rel.entityType == input.subjectType &&
                rel.validNamedEntityLabels.contains(input.objectType)) {
            Collection<SemgrexPattern> rulesForRel = rules.get(rel);
            CoreMap sentence = input.sentence.asCoreMap(Sentence::nerTags, Sentence::dependencyGraph);
            boolean matches
                    = matches(sentence, rulesForRel, input,
                    sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)) ||
                    matches(sentence, rulesForRel, input,
                            sentence.get(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class));
            if (matches) {
                //logger.log("MATCH for " + rel +  ". " + sentence: + sentence + " with rules for  " + rel);
                return Pair.makePair(rel.canonicalName, 1.0);
            }
        }
    }

    return Pair.makePair(NO_RELATION, 1.0);
}
 
Example #8
Source File: CoreNLP.java    From gAnswer with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public Tree getParseTree (String text) {
    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);
    
    // run all Annotators on this text
    pipeline_lemma.annotate(document);
    
    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    
    for(CoreMap sentence: sentences) {
    	// this is the parse tree of the current sentence
    	return sentence.get(TreeAnnotation.class);
    }	    
    
    return null;
}
 
Example #9
Source File: CoreNLPCache.java    From phrasal with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Load serialized CoreNLP annotations from a file.
 *  
 * @param filename
 */
public static int loadSerialized(String filename) {
  Annotation annotation = IOTools.deserialize(filename, Annotation.class);
  List<CoreMap> sentenceList = annotation.get(CoreAnnotations.SentencesAnnotation.class);

  if (sentenceList == null) {
    throw new RuntimeException("Unusable annotation (no sentences) in " + filename);
  }
  annotationMap = new HashMap<Integer,CoreMap>(sentenceList.size());
  int maxLineId = 0;
  for (CoreMap annotationSet : sentenceList) {
    // 1-indexed
    int lineId = annotationSet.get(CoreAnnotations.LineNumberAnnotation.class);
    maxLineId = lineId > maxLineId ? lineId : maxLineId;
    annotationMap.put(lineId-1, annotationSet);
  }
  return maxLineId + 1;
}
 
Example #10
Source File: SentimentAnalyzer.java    From hazelcast-jet-demos with Apache License 2.0 6 votes vote down vote up
private double getScore(List<CoreMap> sentences, double overallSentiment) {
    int matrixIndex =
            overallSentiment < -0.5  ? 0  // very negative
            : overallSentiment < 0.0 ? 1  // negative
            : overallSentiment < 0.5 ? 3  // positive
            : 4;                       // very positive
    double sum = 0;
    int numberOfSentences = 0;
    for (CoreMap sentence : sentences) {
        Tree sentiments = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
        int predictedClass = RNNCoreAnnotations.getPredictedClass(sentiments);
        if (predictedClass == 2) { // neutral
            continue;
        }
        SimpleMatrix matrix = RNNCoreAnnotations.getPredictions(sentiments);
        sum += matrix.get(matrixIndex);
        numberOfSentences++;
    }
    return sum / numberOfSentences;
}
 
Example #11
Source File: CorefTool.java    From Criteria2Query with Apache License 2.0 6 votes vote down vote up
public void extractCoref() {
	String s="Subjects with hypothyroidism who are on stable treatment for 3 months prior to screening are required to have TSH and free thyroxine (FT4) obtained. If the TSH value is out of range, but FT4 is normal, such cases should be discussed directly with the JRD responsible safety physician before the subject is enrolled. If the FT4 value is out of range, the subject is not eligible.";
	 Annotation document = new Annotation(s);
	    Properties props = new Properties();
	    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,mention,coref");
	    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	    pipeline.annotate(document);
	    System.out.println("---");
	    System.out.println("coref chains");
	    for (CorefChain cc : document.get(CorefCoreAnnotations.CorefChainAnnotation.class).values()) {
	      System.out.println("\t" + cc);
	    }
	    for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
	      System.out.println("---");
	      System.out.println("mentions");
	      for (Mention m : sentence.get(CorefCoreAnnotations.CorefMentionsAnnotation.class)) {
	        System.out.println("\t" + m);
	       }
	    }
}
 
Example #12
Source File: InteractiveDriver.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args) throws IOException {
    Properties props = StringUtils.argsToProperties(args);
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
    props.setProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");
    Set<String> interested = Stream.of("per:title", "per:employee_of", "org:top_members/employees").collect(Collectors.toSet());
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    IOUtils.console("sentence> ", line -> {
        Annotation ann = new Annotation(line);
        pipeline.annotate(ann);
        for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
            sentence.get(CoreAnnotations.KBPTriplesAnnotation.class).forEach(r -> {
                 String relation = r.relationGloss();
                if(interested.contains(relation)) {
                    System.err.println(r);
                }
            });
        }
    });
}
 
Example #13
Source File: RelationExtractor.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static HashMap<String, String> extract(String sentence) {
    Annotation doc = new Annotation(sentence);
    pipeline.annotate(doc);
    r.annotate(doc);
    HashMap<String, String> map = new HashMap<String, String>();
    for(CoreMap s: doc.get(CoreAnnotations.SentencesAnnotation.class)){
      List<RelationMention> rls  = s.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
      for(RelationMention rl: rls){
        if(rl.getType().equals("Work_For")){
          System.out.println(rl);
          String organization = "";
          String people = "";
          for (EntityMention entity: rl.getEntityMentionArgs()){
            if(entity.getType().equals("ORGANIZATION")){
              organization = entity.getValue();
            }
            if(entity.getType().equals("PEOPLE")){
              people = entity.getValue();
            }
          }
          map.put(people, organization);
        }
      }
    }
    return map;
}
 
Example #14
Source File: StanfordCoreNLPTest.java    From java_in_examples with Apache License 2.0 6 votes vote down vote up
public static void main(String[] s) {
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    // read some text in the text variable
    String text = "\"But I do not want to go among mad people,\" Alice remarked.\n" +
            "\"Oh, you can not help that,\" said the Cat: \"we are all mad here. I am mad. You are mad.\"\n" +
            "\"How do you know I am mad?\" said Alice.\n" +
            "\"You must be,\" said the Cat, \"or you would not have come here.\" This is awful, bad, disgusting";

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);

    // run all Annotators on this text
    pipeline.annotate(document);

    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        String sentiment = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
        System.out.println(sentiment + "\t" + sentence);
    }
}
 
Example #15
Source File: Minimization.java    From minie with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Given a list of words to be removed and a list of matched nodes, remove the words to be removed from the phrase and
 * empty that list, also empty the list of matched nodes
 * @param remWords
 * @param matchedNodes
 */
public void dropWords(List<CoreMap> remWords, List<CoreMap> matchWords){
    matchWords.clear();
    // in addition to removing the words, save them in a separate list
    ObjectArrayList<SemanticGraphEdge> droppedEdges = CoreNLPUtils.listOfCoreMapWordsToParentEdges(this.sg, remWords);
    /*ObjectArrayList<SemanticGraphEdge> droppedEdges = new ObjectArrayList<SemanticGraphEdge>();
    for (IndexedWord word: remWordsArray) {
        SemanticGraphEdge edge = this.sg.getEdge(this.sg.getParent(word), word);
        droppedEdges.add(edge);
    }*/
    this.phrase.addDroppedEdges(droppedEdges);
    this.phrase.addDroppedWords(CoreNLPUtils.getWordListFromCoreMapList(remWords));
    // remove words
    this.phrase.removeCoreLabelWordsFromList(remWords);
    remWords.clear();
}
 
Example #16
Source File: KBPModel.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static HashMap<RelationTriple, String> extract(String doc) {

        Annotation ann = new Annotation(doc);
        pipeline.annotate(ann);
        HashMap<RelationTriple, String> relations = new HashMap<RelationTriple, String>();

        for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
            for(RelationTriple r : sentence.get(CoreAnnotations.KBPTriplesAnnotation.class)){
                if(r.relationGloss().trim().equals("per:title")
                        || r.relationGloss().trim().equals("per:employee_of")
                        || r.relationGloss().trim().equals("org:top_members/employees")){
                    relations.put(r, sentence.toString());
                }
            }
        }
        return relations;
    }
 
Example #17
Source File: Minimization.java    From minie with GNU General Public License v3.0 6 votes vote down vote up
/** Given a phrase, if it contains NERs, make a dictionary minimization around them **/
public void namedEntityDictionaryMinimization(List<CoreMap> remWords, List<CoreMap> matchWords){
    // If (.* DT+ [RB|JJ]* NER+ .*) => drop (DT+)
    this.tPattern = TokenSequencePattern.compile(REGEX.T_RB_JJ_NER);
    this.tMatcher = tPattern.getMatcher(this.phrase.getWordCoreLabelList());
    while (this.tMatcher.find()){         
        matchWords = tMatcher.groupNodes();
        
        for (CoreMap cm: matchWords){
            CoreLabel cl = new CoreLabel(cm);
            if (cl.lemma() == null) cl.setLemma(cl.word());
            
            // Check if the word is DT, drop it
            if ((CoreNLPUtils.isAdj(cl.tag()) || CoreNLPUtils.isAdverb(cl.tag())) 
                    && cl.ner().equals(NE_TYPE.NO_NER)){
                remWords.add(cm);   
            }
        }
        
        // Drop the words not found in dict. 
        this.dropWordsNotFoundInDict(matchWords, remWords);
    }
    
    // Do the safe minimization
    this.namedEntitySafeMinimization(remWords, matchWords);
}
 
Example #18
Source File: ComparisonUtils.java    From NLIWOD with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Retrieves a part of speech from the given string, depending on the parameter tag.
 * JJR for comparatives and JJS for superlatives.
 * @param question String to retrieve words from. 
 * @param tag JJR for comparatives and JJS for superlatives.
 * @return List of the retrieved words. 
 */
private ArrayList<String> getWords(String question, String tag) {
	if(question == null || tag == null) return null;
	Annotation annotation = new Annotation(question);
       PIPELINE.annotate(annotation);  
       List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
       
		ArrayList<String> words = new ArrayList<String>();		
		for (CoreMap sentence : sentences) {
           List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
           for(CoreLabel token: tokens) {
              	if(token.tag().startsWith(tag)){
           		String word = token.toString();
           		words.add(word.substring(0, word.lastIndexOf("-")));
           	}
           }
       }       	
		return words;
	}
 
Example #19
Source File: RegexNerTest.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
public static List<String> extractNER(String doc){
    Annotation document = new Annotation(doc);

    pipeline.annotate(document);
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    List<String> result = new ArrayList<String>();
    for(CoreMap sentence: sentences) {
        // traversing the words in the current sentence
        // a CoreLabel is a CoreMap with additional token-specific methods
        for (CoreLabel token: sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            // this is the text of the token
            String word = token.get(CoreAnnotations.TextAnnotation.class);
            // this is the POS tag of the token
            String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
            // this is the NER label of the token
            String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
            result.add(ne);
        }
    }
    return result;
}
 
Example #20
Source File: KBPSemgrexExtractor.java    From InformationExtraction with GNU General Public License v3.0 6 votes vote down vote up
@Override
public Pair<String, Double> classify(KBPInput input) {
  for (RelationType rel : RelationType.values()) {

    if (rules.containsKey(rel) &&
        rel.entityType == input.subjectType &&
        rel.validNamedEntityLabels.contains(input.objectType)) {
      Collection<SemgrexPattern> rulesForRel = rules.get(rel);
      CoreMap sentence = input.sentence.asCoreMap(Sentence::nerTags, Sentence::dependencyGraph);
      boolean matches
          = matches(sentence, rulesForRel, input,
          sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)) ||
          matches(sentence, rulesForRel, input,
              sentence.get(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class));
      if (matches) {
        //logger.log("MATCH for " + rel +  ". " + sentence: + sentence + " with rules for  " + rel);
        return Pair.makePair(rel.canonicalName, 1.0);
      }
    }
  }

  return Pair.makePair(NO_RELATION, 1.0);
}
 
Example #21
Source File: Chapter8.java    From Natural-Language-Processing-with-Java-Second-Edition with MIT License 5 votes vote down vote up
private static void usingStanfordPipelineParallel() {
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    String path = "C:\\Current Books\\NLP and Java\\Downloads\\stanford-ner-2014-10-26\\classifiers";
    props.put("ner.model", path + "/english.muc.7class.distsim.crf.ser.gz");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    Annotation annotation1 = new Annotation("The robber took the cash and ran.");
    Annotation annotation2 = new Annotation("The policeman chased him down the street.");
    Annotation annotation3 = new Annotation("A passerby, watching the action, tripped the thief as he passed by.");
    Annotation annotation4 = new Annotation("They all lived happily everafter, except for the thief of course.");
    ArrayList<Annotation> list = new ArrayList();
    list.add(annotation1);
    list.add(annotation2);
    list.add(annotation3);
    list.add(annotation4);
    Iterable<Annotation> iterable = list;

    pipeline.annotate(iterable);

    System.out.println("Total time: " + pipeline.timingInformation());
    List<CoreMap> sentences = annotation2.get(SentencesAnnotation.class);

    for (CoreMap sentence : sentences) {
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            String word = token.get(TextAnnotation.class);
            String pos = token.get(PartOfSpeechAnnotation.class);
            System.out.println("Word: " + word + " POS Tag: " + pos);
        }
    }
}
 
Example #22
Source File: CoreNLP.java    From gAnswer with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public String getBaseFormOfPattern (String text) {
	String ret = new String("");
	
    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);
    // run all Annotators on this text
    pipeline_lemma.annotate(document);


    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    
    int count = 0;
    for(CoreMap sentence: sentences) {
      // traversing the words in the current sentence
      // a CoreLabel is a CoreMap with additional token-specific methods
      for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
        // this is the base form (lemma) of the token
        String lemma = token.getString(LemmaAnnotation.class);
        ret += lemma;
        ret += " ";
      }
      count ++;
      if (count % 100 == 0) {
    	  System.out.println(count);
      }
    }
    
    return ret.substring(0, ret.length()-1);
}
 
Example #23
Source File: CoreNLP.java    From gAnswer with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public Word[] getTaggedWords (String sentence) {
	CoreMap taggedSentence = getPOS(sentence);
	Word[] ret = new Word[taggedSentence.get(TokensAnnotation.class).size()];
	int count = 0;
	for (CoreLabel token : taggedSentence.get(TokensAnnotation.class)) {
		// this is the text of the token
		String word = token.get(TextAnnotation.class);
		// this is the POS tag of the token
		String pos = token.get(PartOfSpeechAnnotation.class);
		//System.out.println(word+"["+pos+"]");
		ret[count] = new Word(getBaseFormOfPattern(word.toLowerCase()), word, pos, count+1);
		count ++;
	}
	return ret;
}
 
Example #24
Source File: Extract.java    From phrases with Apache License 2.0 5 votes vote down vote up
private HashSet<Pattern> ExtractSentencePatterns(CoreMap sentence) {
    SemanticGraph semanticGraph = sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);

    List<Pattern> primary = ExtractPrimaryPatterns(semanticGraph.typedDependencies());

    List<Pattern> combined;
    combined = ExtractCombinedPatterns(primary, primary);
    combined.addAll(ExtractCombinedPatterns(combined, primary));
    combined.addAll(ExtractCombinedPatterns(combined, primary));

    return PruneCombinedPatterns(combined);
}
 
Example #25
Source File: TemporalNormalize.java    From Criteria2Query with Apache License 2.0 5 votes vote down vote up
public Integer temporalNormalizeforNumberUnit(String text) {
	Annotation annotation = new Annotation(text);
	annotation.set(CoreAnnotations.DocDateAnnotation.class, SUTime.getCurrentTime().toString());
	pipeline.annotate(annotation);

	System.out.println(annotation.get(CoreAnnotations.TextAnnotation.class));

	List<CoreMap> timexAnnsAll = annotation.get(TimeAnnotations.TimexAnnotations.class);
	Integer days=0;
	for (CoreMap cm : timexAnnsAll) {

		List<CoreLabel> tokens = cm.get(CoreAnnotations.TokensAnnotation.class);
		 System.out.println(cm + " [from char offset " + tokens.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class) + " to "
		 + tokens.get(tokens.size() -1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class) + ']'
		 + " --> " + cm.get(TimeExpression.Annotation.class).getTemporal());
		 System.out.println("!!!!-->" +cm.get(TimeExpression.Annotation.class).getValue());
		 System.out.println("---final result---");
		String tstr = cm.get(TimeExpression.Annotation.class).getTemporal().toString();
		System.out.println("tstr"+tstr);
		int k=tstr.indexOf("P");
		if(tstr.indexOf(")")!=-1){
			tstr=tstr.substring(k, tstr.length()-1);
		}
		double total = TemporalConvert.convertTodayUnit(tstr);
		double number = TemporalConvert.recognizeNumbersFormSUTime(tstr);
		System.out.println("t="+total);
		System.out.println("n="+number);
		// System.out.println("unit=" + total);
		if(total==30 && number==12){
			days=365;
			
		}else{
			days=(int) (total * number);
		}
	}
	return days;
	
}
 
Example #26
Source File: CoreNLPUtils.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Given a CoreNLP pipeline and an input sentence, generate dependency parse for the sentence and return
 * the SemanticGraph object as a result
 * @param pipeline - CoreNLP pipeline
 * @param snt - input sentence
 * @return dependency parse in SemanticGraph object
 */
public static SemanticGraph parse(StanfordCoreNLP pipeline, String snt) {
    Annotation document = new Annotation(snt);
    pipeline.annotate(document);
    
    //A CoreMap is a sentence with annotations
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    SemanticGraph semanticGraph = null;
    for(CoreMap sentence: sentences) {
        semanticGraph = sentence.get(BasicDependenciesAnnotation.class);
    }
    
    return semanticGraphUniversalEnglishToEnglish(semanticGraph);
}
 
Example #27
Source File: CoreNLPUtils.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Given a list of words (as core maps), return the phrase of words as a list of indexed word objects
 * @param words: list of words (e.g. [She, is, pretty])
 * @return list of words (as IndexedWord)
 */
public static ObjectArrayList<IndexedWord> listOfCoreMapWordsToIndexedWordList(List<CoreMap> cmList){
    ObjectArrayList<IndexedWord> wordList = new ObjectArrayList<>();
    for (CoreMap cm: cmList){
        wordList.add(new IndexedWord(new CoreLabel(cm)));
    }
    return wordList;
}
 
Example #28
Source File: JsonPipeline.java    From tac2015-event-detection with GNU General Public License v3.0 5 votes vote down vote up
static void addTokenBasics(Map<String,Object> sent_info, CoreMap sentence) {
	List<List<Integer>> tokenSpans = Lists.newArrayList();
	List<String> tokenTexts = Lists.newArrayList();
	for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
		List<Integer> span = Lists.newArrayList(token.beginPosition(), token.endPosition());
		tokenSpans.add(span);
		tokenTexts.add(token.value());
	}
	sent_info.put("tokens", (Object) tokenTexts);
	sent_info.put("char_offsets", (Object) tokenSpans);
}
 
Example #29
Source File: CoreNLPUtils.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
public static ObjectArrayList<CoreLabel> getCoreLabelListFromCoreMapList(ObjectArrayList<CoreMap> coreMapList){
    ObjectArrayList<CoreLabel> coreLabelList = new ObjectArrayList<>();
    for (CoreMap cm: coreMapList){
        coreLabelList.add(new CoreLabel(cm));
    }
    return coreLabelList;
}
 
Example #30
Source File: CoreNLPUtils.java    From minie with GNU General Public License v3.0 5 votes vote down vote up
public static ObjectArrayList<IndexedWord> getWordListFromCoreMapList(List<CoreMap> coreMapList){
    ObjectArrayList<IndexedWord> coreLabelList = new ObjectArrayList<>();
    for (CoreMap cm: coreMapList){
        coreLabelList.add(new IndexedWord(new CoreLabel(cm)));
    }
    return coreLabelList;
}