java source code of IKArbitrator

/**
 * IK 中文分词  版本 5.0
 * IK Analyzer release 5.0
 * <p>
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * <p>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * <p>
 * 源代码由林良益([email protected])提供
 * 版权声明 2012，乌龙茶工作室
 * provided by Linliangyi and copyright 2012 by Oolong studio
 */
package casia.isiteam.zdr.wltea.analyzer.core;

import java.util.Stack;
import java.util.TreeSet;

/**
 * IK分词歧义裁决器
 */
class IKArbitrator {

    IKArbitrator() {

    }

    /**
     * 分词歧义处理
     //	 * @param orgLexemes
     * @param useSmart
     */
    void process(AnalyzeContext context, boolean useSmart) {
        QuickSortSet orgLexemes = context.getOrgLexemes();
        Lexeme orgLexeme = orgLexemes.pollFirst();

        LexemePath crossPath = new LexemePath();
        while (orgLexeme != null) {
            if (!crossPath.addCrossLexeme(orgLexeme)) {
                //找到与crossPath不相交的下一个crossPath
                if (crossPath.size() == 1 || !useSmart) {
                    //crossPath没有歧义 或者 不做歧义处理
                    //直接输出当前crossPath
                    context.addLexemePath(crossPath);
                } else {
                    //对当前的crossPath进行歧义处理
                    QuickSortSet.Cell headCell = crossPath.getHead();
                    LexemePath judgeResult = this.judge(headCell, crossPath.getPathLength());
                    //输出歧义处理结果judgeResult
                    context.addLexemePath(judgeResult);
                }

                //把orgLexeme加入新的crossPath中
                crossPath = new LexemePath();
                crossPath.addCrossLexeme(orgLexeme);
            }
            orgLexeme = orgLexemes.pollFirst();
        }


        //处理最后的path
        if (crossPath.size() == 1 || !useSmart) {
            //crossPath没有歧义 或者 不做歧义处理
            //直接输出当前crossPath
            context.addLexemePath(crossPath);
        } else {
            //对当前的crossPath进行歧义处理
            QuickSortSet.Cell headCell = crossPath.getHead();
            LexemePath judgeResult = this.judge(headCell, crossPath.getPathLength());
            //输出歧义处理结果judgeResult
            context.addLexemePath(judgeResult);
        }
    }

    /**
     * 歧义识别
     * @param lexemeCell 歧义路径链表头
     * @param fullTextLength 歧义路径文本长度
     * @return
     */
    private LexemePath judge(QuickSortSet.Cell lexemeCell, int fullTextLength) {
        //候选路径集合
        TreeSet<LexemePath> pathOptions = new TreeSet<LexemePath>();
        //候选结果路径
        LexemePath option = new LexemePath();

        //对crossPath进行一次遍历,同时返回本次遍历中有冲突的Lexeme栈
        Stack<QuickSortSet.Cell> lexemeStack = this.forwardPath(lexemeCell, option);

        //当前词元链并非最理想的，加入候选路径集合
        pathOptions.add(option.copy());

        //存在歧义词，处理
        QuickSortSet.Cell c = null;
        while (!lexemeStack.isEmpty()) {
            c = lexemeStack.pop();
            //回滚词元链
            this.backPath(c.getLexeme(), option);
            //从歧义词位置开始，递归，生成可选方案
            this.forwardPath(c, option);
            pathOptions.add(option.copy());
        }

        //返回集合中的最优方案
        return pathOptions.first();

    }

    /**
     * 向前遍历，添加词元，构造一个无歧义词元组合
     //	 * @param LexemePath path
     * @return
     */
    private Stack<QuickSortSet.Cell> forwardPath(QuickSortSet.Cell lexemeCell, LexemePath option) {
        //发生冲突的Lexeme栈
        Stack<QuickSortSet.Cell> conflictStack = new Stack<QuickSortSet.Cell>();
        QuickSortSet.Cell c = lexemeCell;
        //迭代遍历Lexeme链表
        while (c != null && c.getLexeme() != null) {
            if (!option.addNotCrossLexeme(c.getLexeme())) {
                //词元交叉，添加失败则加入lexemeStack栈
                conflictStack.push(c);
            }
            c = c.getNext();
        }
        return conflictStack;
    }

    /**
     * 回滚词元链，直到它能够接受指定的词元
     //	 * @param lexeme
     * @param l
     */
    private void backPath(Lexeme l, LexemePath option) {
        while (option.checkCross(l)) {
            option.removeTail();
        }

    }

}