java source code of Chapter7

package packt;

import edu.stanford.nlp.dcoref.CorefChain;
import edu.stanford.nlp.dcoref.CorefChain.CorefMention;
import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation;
import edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations;
import edu.stanford.nlp.ie.machinereading.structure.RelationMention;
import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.trees.GrammaticalStructure;
import edu.stanford.nlp.trees.GrammaticalStructureFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.trees.TypedDependency;
import edu.stanford.nlp.util.CoreMap;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import opennlp.tools.cmdline.parser.ParserTool;
import opennlp.tools.parser.Parse;
import opennlp.tools.parser.ParserFactory;
import opennlp.tools.parser.ParserModel;
import opennlp.tools.parser.Parser;
import opennlp.tools.tokenize.SimpleTokenizer;

public class Chapter7 {

    public static void main(String[] args) {
        usingOpenNLP();
//        usingStanford();
    }

    static Set<String> nounPhrases = new HashSet<>();

    private static void usingOpenNLP() {
        String fileLocation = getModelDir() + "/en-parser-chunking.bin";
        System.out.println(fileLocation);
        try (InputStream modelInputStream = new FileInputStream(fileLocation);) {
            ParserModel model = new ParserModel(modelInputStream);
            Parser parser = ParserFactory.create(model);
            String sentence = "The cow jumped over the moon";
            // Used to demonstrate difference between NER and Parser
            sentence = "He was the last person to see Fred.";

            Parse parses[] = ParserTool.parseLine(sentence, parser, 3);
            for (Parse parse : parses) {
                // First display
                parse.show();
                // Second display
//                parse.showCodeTree();
                // Third display
//                System.out.println("Children");
//                Parse children[] = parse.getChildren();
//                for (Parse parseElement : children) {
//                    System.out.println(parseElement);
//                    System.out.println(parseElement.getText());
//                    System.out.println(parseElement.getType());
//                    Parse tags[] = parseElement.getTagNodes();
//                    System.out.println("Tags");
//                    for (Parse tag : tags) {
//                        System.out.println("[" + tag + "]" + " type: " + tag.getType()
//                                + "  Probability: " + tag.getProb()
//                                + "  Label: " + tag.getLabel());
//                    }
//                }
            }
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }

    public static String getModelDir() {
        return "C:/Current Books/NLP and Java/Models";
    }

    private static void usingStanfordParsers() {
        usingStanford();
//        usingStanfordLexicalizedParser();
    }

    private static void usingStanford() {
        usingStanfordLexicalizedParser();
//        usingStanfordRelationExtraction();
//        usingStanfordCoreferenceResolution();
//        extractingRelations();
    }

    private static void usingStanfordLexicalizedParser() {
        String parserModel = "C:/Current Books in Progress/NLP and Java/Models/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
        LexicalizedParser lexicalizedParser = LexicalizedParser.loadModel(parserModel);

        // This option shows parsing a list of correctly tokenized words
        System.out.println("---First option");
        String[] senetenceArray = {"The", "cow", "jumped", "over", "the", "moon", "."};
        List<CoreLabel> words = Sentence.toCoreLabelList(senetenceArray);

        Tree parseTree = lexicalizedParser.apply(words);
        parseTree.pennPrint();
        System.out.println();

        // This option shows loading and using an explicit tokenizer
        System.out.println("---Second option");
        String sentence = "The cow jumped over the moon.";
        TokenizerFactory<CoreLabel> tokenizerFactory
                = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
        Tokenizer<CoreLabel> tokenizer
                = tokenizerFactory.getTokenizer(new StringReader(sentence));
        List<CoreLabel> wordList = tokenizer.tokenize();
        parseTree = lexicalizedParser.apply(wordList);

        TreebankLanguagePack tlp = lexicalizedParser.treebankLanguagePack(); // PennTreebankLanguagePack for English
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
        GrammaticalStructure gs = gsf.newGrammaticalStructure(parseTree);
        List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
        System.out.println(tdl);
        for (TypedDependency dependency : tdl) {
            System.out.println("Governor Word: [" + dependency.gov() + "] Relation: [" + dependency.reln().getLongName()
                    + "] Dependent Word: [" + dependency.dep() + "]");
        }
        System.out.println();

        // You can also use a TreePrint object to print trees and dependencies
//        System.out.println("---Using TreePrint");
//        TreePrint treePrint = new TreePrint("penn,typedDependenciesCollapsed");
//        treePrint.printTree(parseTree);
//        System.out.println("TreePrint Formats");
//        for (String format : TreePrint.outputTreeFormats) {
//            System.out.println(format);
//        }
//        System.out.println();
    }

    private static void usingStanfordCoreferenceResolution() {
        System.out.println("StanfordCoreferenceResolution");
        String sentence = "He took his cash and she took her change " 
                + "and together they bought their lunch.";
        Properties props = new Properties();
        props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

        Annotation annotation = new Annotation(sentence);
        pipeline.annotate(annotation);
        System.out.println("Sentence: " + sentence);
        Map<Integer, CorefChain> corefChainMap = annotation.get(CorefChainAnnotation.class);

        Set<Integer> set = corefChainMap.keySet();
        Iterator<Integer> setIterator = set.iterator();
        while(setIterator.hasNext()) {
            CorefChain corefChain = corefChainMap.get(setIterator.next());
            System.out.println("CorefChain: " + corefChain);
            System.out.print("ClusterId: " + corefChain.getChainID());
            CorefMention mention = corefChain.getRepresentativeMention();
            System.out.println(" CorefMention: " + mention + " Span: [" + mention.mentionSpan + "]");

            List<CorefMention> mentionList = corefChain.getMentionsInTextualOrder();
            Iterator<CorefMention> mentionIterator = mentionList.iterator();
            while(mentionIterator.hasNext()) {
                CorefMention cfm = mentionIterator.next();
                System.out.println("\tMention: " + cfm + " Span: [" + mention.mentionSpan + "]");
                System.out.print("\tMention Type: " + cfm.mentionType + " Gender: " + cfm.gender);
                System.out.println(" Start: " + cfm.startIndex + " End: " + cfm.endIndex);
            }
            System.out.println();
        }
    }

    private static void extractingRelations() {
        String question = "Who is the 32rd president of the United States?";
//        question = "Who was the 32rd president of the United States?";
//        question = "The 32rd president of the United States was who?";
//        question = "The 32rd president is who of the United States?";
//        question = "What was the 3rd President's party?";
//        question = "When was the 12th president inaugurated";
//        question = "Where is the 30th president's home town?";

        String parserModel = "C:/Current Books/NLP and Java/Models/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
        LexicalizedParser lexicalizedParser = LexicalizedParser.loadModel(parserModel);

        TokenizerFactory<CoreLabel> tokenizerFactory
                = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
        Tokenizer<CoreLabel> tokenizer
                = tokenizerFactory.getTokenizer(new StringReader(question));
        List<CoreLabel> wordList = tokenizer.tokenize();
        Tree parseTree = lexicalizedParser.apply(wordList);

        TreebankLanguagePack tlp = lexicalizedParser.treebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
        GrammaticalStructure gs = gsf.newGrammaticalStructure(parseTree);
        List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
        System.out.println(tdl);
        for (TypedDependency dependency : tdl) {
            System.out.println("Governor Word: [" + dependency.gov() + "] Relation: [" + dependency.reln().getLongName()
                    + "] Dependent Word: [" + dependency.dep() + "]");
        }

        System.out.println();
        System.out.println("You asked: " + question);
        for (TypedDependency dependency : tdl) {
            if ("nominal subject".equals(dependency.reln().getLongName())
                    && "who".equalsIgnoreCase(dependency.gov().originalText())) {
                System.out.println("Found Who question --- Governor Word: [" + dependency.gov() + "] Relation: [" + dependency.reln().getLongName()
                        + "] Dependent Word: [" + dependency.dep() + "]");
                processWhoQuestion(tdl);
            } else if ("nominal subject".equals(dependency.reln().getLongName())
                    && "what".equalsIgnoreCase(dependency.gov().originalText())) {
                System.out.println("Found What question --- Governor Word: [" + dependency.gov() + "] Relation: [" + dependency.reln().getLongName()
                        + "] Dependent Word: [" + dependency.dep() + "]");
            } else if ("adverbial modifier".equals(dependency.reln().getLongName())
                    && "when".equalsIgnoreCase(dependency.dep().originalText())) {
                System.out.println("Found When question --- Governor Word: [" + dependency.gov() + "] Relation: [" + dependency.reln().getLongName()
                        + "] Dependent Word: [" + dependency.dep() + "]");
            } else if ("adverbial modifier".equals(dependency.reln().getLongName())
                    && "where".equalsIgnoreCase(dependency.dep().originalText())) {
                System.out.println("Found Where question --- Governor Word: [" + dependency.gov() + "] Relation: [" + dependency.reln().getLongName()
                        + "] Dependent Word: [" + dependency.dep() + "]");
            }
        }
    }

    private static void processWhoQuestion(List<TypedDependency> tdl) {
        System.out.println("Processing Who Question");
        List<President> list = createPresidentList();
        for (TypedDependency dependency : tdl) {
            if ("president".equalsIgnoreCase(dependency.gov().originalText())
                    && "adjectival modifier".equals(dependency.reln().getLongName())) {
                String positionText = dependency.dep().originalText();
                int position = getOrder(positionText) - 1;
                System.out.println("The president is " + list.get(position).getName());
            }
        }
    }

    private static int getOrder(String position) {
        String tmp = "";
        int i = 0;
        while (Character.isDigit(position.charAt(i))) {
            tmp += position.charAt(i++);
        }
        return Integer.parseInt(tmp);
    }

    private static List<President> createPresidentList() {
        ArrayList<President> list = new ArrayList<>();
        String line = null;
        try (FileReader reader = new FileReader("PresidentList");
                BufferedReader br = new BufferedReader(reader)) {
            while ((line = br.readLine()) != null) {
                SimpleTokenizer simpleTokenizer = SimpleTokenizer.INSTANCE;
                String tokens[] = simpleTokenizer.tokenize(line);
                String name = "";
                String start = "";
                String end = "";
                int i = 0;
                while (!"(".equals(tokens[i])) {
                    name += tokens[i] + " ";
                    i++;
                }
                start = tokens[i + 1];
                end = tokens[i + 3];
                if (end.equalsIgnoreCase("present")) {
                    end = start;
                }
                list.add(new President(name, Integer.parseInt(start),
                        Integer.parseInt(end)));
            }
        } catch (IOException ex) {
            ex.printStackTrace();
        }
        return list;
    }
}