java source code of DependencyParser

package me.xiaosheng.chnlp.parser;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence;
import com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dependency.IDependencyParser;
import com.hankcs.hanlp.dependency.nnparser.NeuralNetworkDependencyParser;
import com.hankcs.hanlp.seg.common.Term;

public class DependencyParser {
    
    /**
     * 依存句法分析
     * @param segResult 分词结果
     * @return CONLL格式分析结果
     */
    public static CoNLLSentence parse(List<Term> segResult) {
        return parse(segResult, false);
    }
    
    /**
     * 依存句法分析
     * @param segResult 分词结果
     * @param englishTag 使用英语标签
     * @return CONLL格式分析结果
     */
    public static CoNLLSentence parse(List<Term> segResult, boolean englishTag) {
        IDependencyParser parser = new NeuralNetworkDependencyParser();
        if (englishTag)
            parser.enableDeprelTranslator(false);
        return parser.parse(segResult);
    }
    
    /**
     * 依存句法分析
     * @param sentence 句子
     * @return CONLL格式分析结果
     */
    public static CoNLLSentence parse(String sentence) {
        return parse(sentence, false);
    }
    
    /**
     * 依存句法分析
     * @param sentence 句子
     * @param englishTag 使用英语标签
     * @return CONLL格式分析结果
     */
    public static CoNLLSentence parse(String sentence, boolean englishTag) {
        IDependencyParser parser = new NeuralNetworkDependencyParser();
        if (englishTag)
            parser.enableDeprelTranslator(false);
        return parser.parse(sentence);
    }
    
    public static List<Term> getWordsInPath(CoNLLWord word) {
        return getWordsInPath(word, Integer.MAX_VALUE);
    }
    
    /**
     * 获得词语依存路径中的词语
     * @param word 词语
     * @param maxReturn 最大路径长度
     * @return 依存路径词语列表
     */
    public static List<Term> getWordsInPath(CoNLLWord word, int maxReturn) {
        List<Term> words = new ArrayList<Term>();
        if (word == CoNLLWord.ROOT || maxReturn < 1) return words;
        while (word != CoNLLWord.ROOT) {
            words.add(new Term(word.LEMMA, Nature.fromString(word.POSTAG)));
            word = word.HEAD;
            if (--maxReturn < 1) break;
        }
        return words;
    }
    
    /**
     * 获得词语依存路径
     * @param segResult 分词结果
     * @return 依存路径列表
     */
    public static List<List<Term>> getWordPaths(List<Term> segResult) {
        CoNLLWord[] wordArray = parse(segResult).getWordArray();
        List<List<Term>> wordPaths = new ArrayList<List<Term>>();
        for (CoNLLWord word : wordArray)
            wordPaths.add(getWordsInPath(word));
        return wordPaths;
    }
    
    /**
     * 获得词语依存路径
     * @param segResult 分词结果
     * @param maxReturn 最大路径长度
     * @return 依存路径列表
     */
    public static List<List<Term>> getWordPaths(List<Term> segResult, int maxReturn) {
        CoNLLWord[] wordArray = parse(segResult).getWordArray();
        List<List<Term>> wordPaths = new ArrayList<List<Term>>();
        for (CoNLLWord word : wordArray)
            wordPaths.add(getWordsInPath(word, maxReturn));
        return wordPaths;
    }
    
    /**
     * 获得词语依存路径
     * @param sentence 句子
     * @return 依存路径列表
     */
    public static List<List<Term>> getWordPaths(String sentence) {
        CoNLLWord[] wordArray = parse(sentence).getWordArray();
        List<List<Term>> wordPaths = new ArrayList<List<Term>>();
        for (CoNLLWord word : wordArray)
            wordPaths.add(getWordsInPath(word));
        return wordPaths;
    }
    
    /**
     * 获得词语依存路径
     * @param sentence 句子
     * @param maxReturn 最大路径长度
     * @return 依存路径列表
     */
    public static List<List<Term>> getWordPaths(String sentence, int maxReturn) {
        CoNLLWord[] wordArray = parse(sentence).getWordArray();
        List<List<Term>> wordPaths = new ArrayList<List<Term>>();
        for (CoNLLWord word : wordArray)
            wordPaths.add(getWordsInPath(word, maxReturn));
        return wordPaths;
    }
    
    /**
     * 获得词语的深度
     * @param word 词语
     * @return 词语在句法树中的深度
     */
    public static int calWordDepth(CoNLLWord word) {
        if (word == CoNLLWord.ROOT) return -1;
        int depth = 0;
        while(word.HEAD != CoNLLWord.ROOT) {
            depth++;
            word = word.HEAD;
        }
        return depth;
    }
    
    /**
     * 获取词语的深度
     * @param segResult 分词结果
     * @return 词语在句法树中的深度
     */
    public static Map<String, Integer> getWordsDepth(List<Term> segResult) {
        CoNLLSentence sentenceDep = parse(segResult);
        HashMap<String, Integer> wordsDepth = new HashMap<>();
        for (CoNLLWord wordDep : sentenceDep)
            wordsDepth.put(wordDep.LEMMA, calWordDepth(wordDep));
        return wordsDepth;
    }
    
    /**
     * 获取词语的深度
     * @param sentence 句子
     * @return 词语在句法树中的深度
     */
    public static Map<String, Integer> getWordsDepth(String sentence) {
        CoNLLSentence sentenceDep = parse(sentence);
        HashMap<String, Integer> wordsDepth = new HashMap<>();
        for (CoNLLWord wordDep : sentenceDep)
            wordsDepth.put(wordDep.LEMMA, calWordDepth(wordDep));
        return wordsDepth;
    }
    
    /**
     * 获取上层词语深度
     * @param segResult 分词结果
     * @param maxDepth 句法树最大深度 
     * @return 词语和对应深度
     */
    public static Map<String, Integer> getTopWordsDepth(List<Term> segResult, int maxDepth) {
        CoNLLSentence sentenceDep = parse(segResult);
        HashMap<String, Integer> wordsDepth = new HashMap<>();
        for (CoNLLWord wordDep : sentenceDep) {
            int depth = calWordDepth(wordDep);
            if (depth <= maxDepth)
                wordsDepth.put(wordDep.LEMMA, depth);
        }
        return wordsDepth;
    }
    
    /**
     * 获取上层词语深度
     * @param sentence 句子
     * @param maxDepth 句法树最大深度 
     * @return 词语和对应深度
     */
    public static Map<String, Integer> getTopWordsDepth(String sentence, int maxDepth) {
        CoNLLSentence sentenceDep = parse(sentence);
        HashMap<String, Integer> wordsDepth = new HashMap<>();
        for (CoNLLWord wordDep : sentenceDep) {
            int depth = calWordDepth(wordDep);
            if (depth <= maxDepth)
                wordsDepth.put(wordDep.LEMMA, depth);
        }
        return wordsDepth;
    }
    
    /**
     * 获取上层词语
     * @param segResult 分词结果
     * @param maxDepth 句法树最大深度 
     * @return 词语
     */
    public static List<String> getTopWords(List<Term> segResult, int maxDepth) {
        CoNLLSentence sentenceDep = parse(segResult);
        List<String> wordsDepth = new ArrayList<>();
        for (CoNLLWord wordDep : sentenceDep) {
            if (calWordDepth(wordDep) <= maxDepth)
                wordsDepth.add(wordDep.LEMMA);
        }
        return wordsDepth;
    }
    
    /**
     * 获取上层词语
     * @param sentence 句子
     * @param maxDepth 句法树最大深度 
     * @return 词语
     */
    public static List<String> getTopWords(String sentence, int maxDepth) {
        CoNLLSentence sentenceDep = parse(sentence);
        List<String> wordsDepth = new ArrayList<>();
        for (CoNLLWord wordDep : sentenceDep) {
            if (calWordDepth(wordDep) <= maxDepth)
                wordsDepth.add(wordDep.LEMMA);
        }
        return wordsDepth;
    }
}