edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation Java Exaples

Source File: CoreNLP.java From gAnswer with BSD 3-Clause "New" or "Revised" License

6 votes

public Tree getParseTree (String text) {
    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);
    
    // run all Annotators on this text
    pipeline_lemma.annotate(document);
    
    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    
    for(CoreMap sentence: sentences) {
    	// this is the parse tree of the current sentence
    	return sentence.get(TreeAnnotation.class);
    }	    
    
    return null;
}

Source File: Trees.java From uncc2014watsonsim with GNU General Public License v2.0

6 votes

public static List<CoreMap> parse(String text) {
    
    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);
    
    // run all Annotators on this text
    pipeline.annotate(document);
    
    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    List<Tree> trees = new ArrayList<>();
    List<Tree> dependencies = new ArrayList<>();
    
    for(CoreMap sentence: sentences) {
      // this is the parse tree of the current sentence
    	Tree t = sentence.get(TreeAnnotation.class);
    	SemanticGraph graph = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
    	trees.add(t);
    }
    return sentences;
}

Source File: CoreNLPParser.java From Heracles with GNU General Public License v3.0

5 votes

@Override
	public void validatedProcess(Dataset dataset, String spanTypeOfSentenceUnit) {
		Properties prop1 = new Properties();
		prop1.setProperty("annotators", "parse");
		StanfordCoreNLP pipeline = new StanfordCoreNLP(prop1, false);
		
		for (Span span : dataset.getSpans(spanTypeOfSentenceUnit)){

			
			HashMap<Integer, Word> wordIndex = new HashMap<>();
			Annotation a = CoreNLPHelper.reconstructStanfordAnnotations(span, wordIndex);
//			Annotation a = new Annotation((String)span.getAnnotations().get("text"));
			
			if (a == null){
				System.out.println(a);
			}
			pipeline.annotate(a);
			for (CoreMap sentence : a.get(SentencesAnnotation.class)){
				//per sentence, check the syntax tree
				Tree tree = sentence.get(TreeAnnotation.class);
//				tree.percolateHeadAnnotations(headFinder);
//				tree.indentedListPrint();
				
				try {
					analyzeTree(tree, span, wordIndex);
				} catch (IllegalSpanException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
				
			}
			
		}

	}

Source File: DistortionProbability.java From phrasal with GNU General Public License v3.0

5 votes

@Override
public void initialize(int sourceInputId,
    Sequence<IString> source) {
    Tree parseTree = CoreNLPCache.get(sourceInputId).get(TreeAnnotation.class);
    this.posTags = parseTree.preTerminalYield();
    
    
}

Source File: Phrase.java From uncc2014watsonsim with GNU General Public License v2.0

5 votes

private static List<Tree> _trees(Phrase p) {
	// create an empty Annotation just with the given text
    Annotation document = p.memo(Phrase.coreNLP);
    
    try{
    	// Run the full parse on this text
    	constituency_parse_pipeline.annotate(document);
	} catch (IllegalArgumentException | NullPointerException ex) {
		/*
		 *  On extremely rare occasions (< 0.00000593% of passages)
		 *  it will throw an error like the following:
		 *  
		 *  Exception in thread "main" java.lang.IllegalArgumentException:
		 *  No head rule defined for SYM using class edu.stanford.nlp.trees.SemanticHeadFinder in SYM-10
		 *  
		 *  On more frequent occasions, you get the following:
		 *  Exception in thread "main" java.lang.NullPointerException
   		 *  at edu.stanford.nlp.dcoref.RuleBasedCorefMentionFinder.findHead(RuleBasedCorefMentionFinder.java:276)
   		 *  
   		 *  Both of these are fatal for the passage.
   		 *  Neither are a big deal for the index. Forget them.
		 */
	}
	return p.memo(Phrase.sentences)
			.stream()
			.map(s -> s.get(TreeAnnotation.class))
			.filter(Objects::nonNull)
			.collect(toList());
}

Source File: CoreNLPToJSON.java From phrasal with GNU General Public License v3.0

4 votes

/**
 * Process an English text file.
 * 
 * @param args
 * @throws IOException 
 */
public static void main(String[] args) throws IOException {
  if (args.length < 1) {
    System.err.printf("Usage: java %s file [inputproperties_str] > json_output%n", CoreNLPToJSON.class.getName());
    System.exit(-1);
  }
  String textFile = args[0];
  InputProperties inputProperties = args.length > 1 ? InputProperties.fromString(args[1]) : new InputProperties();

  StanfordCoreNLP coreNLP = new StanfordCoreNLP(properties);
  
  // Configure tokenizer
  EnglishPreprocessor preprocessor = new EnglishPreprocessor(true);
  
  // Use a map with ordered keys so that the output is ordered by segmentId.
  Map<Integer,SourceSegment> annotations = new TreeMap<Integer,SourceSegment>();
  LineNumberReader reader = IOTools.getReaderFromFile(textFile);
  for (String line; (line = reader.readLine()) != null;) {
    Annotation annotation = coreNLP.process(line);
    List<CoreMap> sentences = annotation.get(SentencesAnnotation.class);
    if (sentences.size() != 1) {
      throw new RuntimeException("Sentence splitting on line: " + String.valueOf(reader.getLineNumber()));
    }
    CoreMap sentence = sentences.get(0);
    Tree tree = sentence.get(TreeAnnotation.class);
    tree.indexLeaves();
    int[] chunkVector = getChunkVector(tree);
    List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
    int numTokens = tokens.size();
    SymmetricalWordAlignment alignment = preprocessor.processAndAlign(line);
    if (alignment.e().size() != numTokens) {
      throw new RuntimeException(String.format("Tokenizer configurations differ: %d/%d", alignment.e().size(), numTokens));
    }
    SourceSegment segment = new SourceSegment(numTokens);
    segment.layoutSpec.addAll(makeLayoutSpec(alignment));
    segment.inputProperties = inputProperties.toString();
    for (int j = 0; j < numTokens; ++j) {
      CoreLabel token = tokens.get(j);
      String word = token.get(TextAnnotation.class);
      segment.tokens.add(unescape(word));
      String pos = mapPOS(token.get(PartOfSpeechAnnotation.class));
      segment.pos.add(pos);
      String ne = token.get(NamedEntityTagAnnotation.class);
      segment.ner.add(ne);
      segment.chunkVector[j] = chunkVector[j];
    }
    annotations.put(reader.getLineNumber()-1, segment);
  }
  reader.close();
  System.err.printf("Processed %d sentences%n", reader.getLineNumber());
  
  final SourceDocument jsonDocument = new SourceDocument(textFile, annotations);
  
  // Convert to json
  Gson gson = new Gson();
  String json = gson.toJson(jsonDocument);
  System.out.println(json);
}

edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation Java Examples