edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation Java Examples

The following examples show how to use edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CoreNLP.java    From gAnswer with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public Tree getParseTree (String text) {
    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);
    
    // run all Annotators on this text
    pipeline_lemma.annotate(document);
    
    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    
    for(CoreMap sentence: sentences) {
    	// this is the parse tree of the current sentence
    	return sentence.get(TreeAnnotation.class);
    }	    
    
    return null;
}
 
Example #2
Source File: Trees.java    From uncc2014watsonsim with GNU General Public License v2.0 6 votes vote down vote up
public static List<CoreMap> parse(String text) {
    
    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);
    
    // run all Annotators on this text
    pipeline.annotate(document);
    
    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    List<Tree> trees = new ArrayList<>();
    List<Tree> dependencies = new ArrayList<>();
    
    for(CoreMap sentence: sentences) {
      // this is the parse tree of the current sentence
    	Tree t = sentence.get(TreeAnnotation.class);
    	SemanticGraph graph = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
    	trees.add(t);
    }
    return sentences;
}
 
Example #3
Source File: CoreNLPParser.java    From Heracles with GNU General Public License v3.0 5 votes vote down vote up
@Override
	public void validatedProcess(Dataset dataset, String spanTypeOfSentenceUnit) {
		Properties prop1 = new Properties();
		prop1.setProperty("annotators", "parse");
		StanfordCoreNLP pipeline = new StanfordCoreNLP(prop1, false);
		
		for (Span span : dataset.getSpans(spanTypeOfSentenceUnit)){

			
			HashMap<Integer, Word> wordIndex = new HashMap<>();
			Annotation a = CoreNLPHelper.reconstructStanfordAnnotations(span, wordIndex);
//			Annotation a = new Annotation((String)span.getAnnotations().get("text"));
			
			if (a == null){
				System.out.println(a);
			}
			pipeline.annotate(a);
			for (CoreMap sentence : a.get(SentencesAnnotation.class)){
				//per sentence, check the syntax tree
				Tree tree = sentence.get(TreeAnnotation.class);
//				tree.percolateHeadAnnotations(headFinder);
//				tree.indentedListPrint();
				
				try {
					analyzeTree(tree, span, wordIndex);
				} catch (IllegalSpanException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
				
			}
			
		}

	}
 
Example #4
Source File: DistortionProbability.java    From phrasal with GNU General Public License v3.0 5 votes vote down vote up
@Override
public void initialize(int sourceInputId,
    Sequence<IString> source) {
    Tree parseTree = CoreNLPCache.get(sourceInputId).get(TreeAnnotation.class);
    this.posTags = parseTree.preTerminalYield();
    
    
}
 
Example #5
Source File: Phrase.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
private static List<Tree> _trees(Phrase p) {
	// create an empty Annotation just with the given text
    Annotation document = p.memo(Phrase.coreNLP);
    
    try{
    	// Run the full parse on this text
    	constituency_parse_pipeline.annotate(document);
	} catch (IllegalArgumentException | NullPointerException ex) {
		/*
		 *  On extremely rare occasions (< 0.00000593% of passages)
		 *  it will throw an error like the following:
		 *  
		 *  Exception in thread "main" java.lang.IllegalArgumentException:
		 *  No head rule defined for SYM using class edu.stanford.nlp.trees.SemanticHeadFinder in SYM-10
		 *  
		 *  On more frequent occasions, you get the following:
		 *  Exception in thread "main" java.lang.NullPointerException
   		 *  at edu.stanford.nlp.dcoref.RuleBasedCorefMentionFinder.findHead(RuleBasedCorefMentionFinder.java:276)
   		 *  
   		 *  Both of these are fatal for the passage.
   		 *  Neither are a big deal for the index. Forget them.
		 */
	}
	return p.memo(Phrase.sentences)
			.stream()
			.map(s -> s.get(TreeAnnotation.class))
			.filter(Objects::nonNull)
			.collect(toList());
}
 
Example #6
Source File: CoreNLPToJSON.java    From phrasal with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Process an English text file.
 * 
 * @param args
 * @throws IOException 
 */
public static void main(String[] args) throws IOException {
  if (args.length < 1) {
    System.err.printf("Usage: java %s file [inputproperties_str] > json_output%n", CoreNLPToJSON.class.getName());
    System.exit(-1);
  }
  String textFile = args[0];
  InputProperties inputProperties = args.length > 1 ? InputProperties.fromString(args[1]) : new InputProperties();

  StanfordCoreNLP coreNLP = new StanfordCoreNLP(properties);
  
  // Configure tokenizer
  EnglishPreprocessor preprocessor = new EnglishPreprocessor(true);
  
  // Use a map with ordered keys so that the output is ordered by segmentId.
  Map<Integer,SourceSegment> annotations = new TreeMap<Integer,SourceSegment>();
  LineNumberReader reader = IOTools.getReaderFromFile(textFile);
  for (String line; (line = reader.readLine()) != null;) {
    Annotation annotation = coreNLP.process(line);
    List<CoreMap> sentences = annotation.get(SentencesAnnotation.class);
    if (sentences.size() != 1) {
      throw new RuntimeException("Sentence splitting on line: " + String.valueOf(reader.getLineNumber()));
    }
    CoreMap sentence = sentences.get(0);
    Tree tree = sentence.get(TreeAnnotation.class);
    tree.indexLeaves();
    int[] chunkVector = getChunkVector(tree);
    List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
    int numTokens = tokens.size();
    SymmetricalWordAlignment alignment = preprocessor.processAndAlign(line);
    if (alignment.e().size() != numTokens) {
      throw new RuntimeException(String.format("Tokenizer configurations differ: %d/%d", alignment.e().size(), numTokens));
    }
    SourceSegment segment = new SourceSegment(numTokens);
    segment.layoutSpec.addAll(makeLayoutSpec(alignment));
    segment.inputProperties = inputProperties.toString();
    for (int j = 0; j < numTokens; ++j) {
      CoreLabel token = tokens.get(j);
      String word = token.get(TextAnnotation.class);
      segment.tokens.add(unescape(word));
      String pos = mapPOS(token.get(PartOfSpeechAnnotation.class));
      segment.pos.add(pos);
      String ne = token.get(NamedEntityTagAnnotation.class);
      segment.ner.add(ne);
      segment.chunkVector[j] = chunkVector[j];
    }
    annotations.put(reader.getLineNumber()-1, segment);
  }
  reader.close();
  System.err.printf("Processed %d sentences%n", reader.getLineNumber());
  
  final SourceDocument jsonDocument = new SourceDocument(textFile, annotations);
  
  // Convert to json
  Gson gson = new Gson();
  String json = gson.toJson(jsonDocument);
  System.out.println(json);
}