Java Code Examples for org.ansj.domain.Term#termNatures()

The following examples show how to use org.ansj.domain.Term#termNatures() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ForeignPersonRecognition.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void recognition(Term[] terms) {
    this.terms = terms;
    String name = null;
    Term term = null;
    reset();
    for (int i = 0; i < terms.length; i++) {
        if (terms[i] == null) {
            continue;
        }

        term = terms[i];
        // 如果名字的开始是人名的前缀,或者后缀.那么忽略
        if (tempList.isEmpty()) {
            if (term.termNatures().personAttr.end > 10) {
                continue;
            }

            if ((terms[i].getName().length() == 1 && ISNOTFIRST.contains(terms[i].getName().charAt(0)))) {
                continue;
            }
        }

        name = term.getName();

        if (term.termNatures() == TermNatures.NR || term.termNatures() == TermNatures.NW || name.length() == 1) {
            boolean flag = validate(name);
            if (flag) {
                tempList.add(term);
            }
        } else if (tempList.size() == 1) {
            reset();
        } else if (tempList.size() > 1) {
            TermUtil.insertTerm(terms, tempList, TermNatures.NR);
            reset();
        }
    }
}
 
Example 2
Source File: ForeignPersonRecognition.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public List<Term> getNewTerms() {
    LinkedList<Term> result = new LinkedList<>();
    String name = null;
    Term term = null;
    reset();
    for (int i = 0; i < terms.length; i++) {
        if (terms[i] == null) {
            continue;
        }

        term = terms[i];
        // 如果名字的开始是人名的前缀,或者后缀.那么忽略
        if (tempList.isEmpty()) {
            if (term.termNatures().personAttr.end > 10) {
                continue;
            }

            if ((terms[i].getName().length() == 1 && ISNOTFIRST.contains(terms[i].getName().charAt(0)))) {
                continue;
            }
        }

        name = term.getName();

        if (term.termNatures() == TermNatures.NR || term.termNatures() == TermNatures.NW || name.length() == 1) {
            boolean flag = validate(name);
            if (flag) {
                tempList.add(term);
            }
        } else if (tempList.size() == 1) {
            reset();
        } else if (tempList.size() > 1) {
            result.add(makeNewTerm());
            reset();
        }
    }
    return result;
}
 
Example 3
Source File: Graph.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * 增加一个词语到图中
 * 
 * @param term
 */
public void addTerm(Term term) {
    // 是否有数字
    if (!hasNum && term.termNatures().numAttr.numFreq > 0) {
        hasNum = true;
    }
    // 是否有人名
    if (!hasPerson && term.termNatures().personAttr.flag) {
        hasPerson = true;
    }
    TermUtil.insertTerm(terms, term, InsertTermType.REPLACE);

}
 
Example 4
Source File: TermUtil.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * 将两个term合并为一个全新的term
 * 
 * @param termNatures
 * @return
 */
public static Term makeNewTermNum(Term from, Term to, TermNatures termNatures) {
    Term term = new Term(from.getName() + to.getName(), from.getOffe(), termNatures);
    term.termNatures().numAttr = from.termNatures().numAttr;
    TermUtil.termLink(term, to.to());
    TermUtil.termLink(term.from(), term);
    return term;
}
 
Example 5
Source File: MathUtil.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * 从一个词的词性到另一个词的词的分数
 * 
 * @param form
 *            前面的词
 * @param to
 *            后面的词
 * @return 分数
 */
public static double compuScore(Term from, Term to, Map<String, Double> relationMap) {
    double frequency = from.termNatures().allFreq + 1;

    if (frequency < 0) {
        double score = from.score() + MAX_FREQUENCE;
        from.score(score);
        return score;
    }

    double nTwoWordsFreq = NgramLibrary.getTwoWordFreq(from, to);

    if (relationMap != null) {
        Double d = relationMap.get(from.getName() + TAB + to.getName());
        if (d != null) {
            nTwoWordsFreq += d;
        }
    }

    double value = -Math.log(D_SMOOTHING_PARA * frequency / (MAX_FREQUENCE + 80000)
                    + (1 - D_SMOOTHING_PARA) * ((1 - D_TEMP) * nTwoWordsFreq / frequency + D_TEMP));

    if (value < 0) {
        value += frequency;
    }
    return from.score() + value;
}
 
Example 6
Source File: NameFix.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * 人名消歧,比如.邓颖超生前->邓颖 超生 前 fix to 丁颖超 生 前! 规则的方式增加如果两个人名之间连接是- , ·,•则连接
 */
public static void nameAmbiguity(Term[] terms, Forest... forests) {
    Term from = null;
    Term term = null;
    Term next = null;
    for (int i = 0; i < terms.length - 1; i++) {
        term = terms[i];
        if (term != null && term.termNatures() == TermNatures.NR && term.getName().length() == 2) {
            next = terms[i + 2];
            if (next.termNatures().personAttr.split > 0) {
                term.setName(term.getName() + next.getName().charAt(0));
                terms[i + 2] = null;

                String name = next.getName().substring(1);
                terms[i + 3] = new Term(name, next.getOffe() + 1,
                                new NatureRecognition(forests).getTermNatures(name));
                TermUtil.termLink(term, terms[i + 3]);
                TermUtil.termLink(terms[i + 3], next.to());
            }
        }
    }

    // 外国人名修正
    for (int i = 0; i < terms.length; i++) {
        term = terms[i];
        if (term != null && term.getName().length() == 1 && i > 0
                        && WordAlert.CharCover(term.getName().charAt(0)) == '·') {
            from = term.from();
            next = term.to();

            if (from.natrue().natureStr.startsWith("nr") && next.natrue().natureStr.startsWith("nr")) {
                from.setName(from.getName() + term.getName() + next.getName());
                TermUtil.termLink(from, next.to());
                terms[i] = null;
                terms[i + 1] = null;
            }
        }
    }

}
 
Example 7
Source File: NumRecognition.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
/**
 * 数字+数字合并,zheng
 * 
 * @param terms
 */
@Override
public void recognition(Term[] terms) {
    int length = terms.length - 1;
    Term from = null;
    Term to = null;
    Term temp = null;
    for (int i = 0; i < length; i++) {
        if (terms[i] == null) {
            continue;
        } else if (".".equals(terms[i].getName()) || ".".equals(terms[i].getName())) {
            // 如果是.前后都为数字进行特殊处理
            to = terms[i].to();
            from = terms[i].from();
            if (from.termNatures().numAttr.flag && to.termNatures().numAttr.flag) {
                from.setName(from.getName() + "." + to.getName());
                TermUtil.termLink(from, to.to());
                terms[to.getOffe()] = null;
                terms[i] = null;
                i = from.getOffe() - 1;
            }
            continue;
        } else if (!terms[i].termNatures().numAttr.flag) {
            continue;
        }

        temp = terms[i];
        // 将所有的数字合并
        while ((temp = temp.to()).termNatures().numAttr.flag) {
            terms[i].setName(terms[i].getName() + temp.getName());
        }
        // 如果是数字结尾
        if (MyStaticValue.isQuantifierRecognition && temp.termNatures().numAttr.numEndFreq > 0) {
            terms[i].setName(terms[i].getName() + temp.getName());
            temp = temp.to();
        }

        // 如果不等,说明terms[i]发生了改变
        if (terms[i].to() != temp) {
            TermUtil.termLink(terms[i], temp);
            // 将中间无用元素设置为null
            for (int j = i + 1; j < temp.getOffe(); j++) {
                terms[j] = null;
            }
            i = temp.getOffe() - 1;
        }
    }

}
 
Example 8
Source File: ForeignPersonRecognition.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
public List<NewWord> getNewWords(Term[] terms) {
    this.terms = terms;
    List<NewWord> all = new ArrayList<>();
    String name = null;
    Term term = null;
    reset();
    for (int i = 0; i < terms.length; i++) {
        if (terms[i] == null) {
            continue;
        }

        term = terms[i];
        // 如果名字的开始是人名的前缀,或者后缀.那么忽略
        if (tempList.isEmpty()) {
            if (term.termNatures().personAttr.end > 10) {
                continue;
            }

            if ((terms[i].getName().length() == 1 && ISNOTFIRST.contains(terms[i].getName().charAt(0)))) {
                continue;
            }
        }

        name = term.getName();
        if (term.termNatures() == TermNatures.NR || term.termNatures() == TermNatures.NW || name.length() == 1) {
            boolean flag = validate(name);
            if (flag) {
                tempList.add(term);
            }
        } else if (tempList.size() == 1) {
            reset();
        } else if (tempList.size() > 1) {
            StringBuilder sb = new StringBuilder();
            for (Term temp : tempList) {
                sb.append(temp.getName());
            }
            all.add(new NewWord(sb.toString(), Nature.NRF));
            reset();
        }
    }
    return all;
}
 
Example 9
Source File: MathUtil.java    From deeplearning4j with Apache License 2.0 2 votes vote down vote up
/**
 * 词性词频词长.计算出来一个分数
 * 
 * @param from
 * @param term
 * @return
 */
public static double compuScoreFreq(Term from, Term term) {
    return from.termNatures().allFreq + term.termNatures().allFreq;
}