org.apdplat.word.segmentation.PartOfSpeech Java Examples

The following examples show how to use org.apdplat.word.segmentation.PartOfSpeech. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WordToken.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
@Override
public NlpTag getTag() {
    PartOfSpeech nature = word.getPartOfSpeech();
    // 未知
    if (nature == PartOfSpeech.I) {
        return NlpTag.X;
    }
    // 英语
    if (nature.getPos().equalsIgnoreCase("w")) {
        return NlpTag.X;
    }
    return PekingUniversityTagger.CHINESE_TAGGER.getTag(nature.getPos());
}
 
Example #2
Source File: WordToken.java    From jstarcraft-nlp with Apache License 2.0 4 votes vote down vote up
@Override
public String getNature() {
    PartOfSpeech nature = word.getPartOfSpeech();
    return nature.getPos();
}
 
Example #3
Source File: PersonName.java    From word with Apache License 2.0 4 votes vote down vote up
private static List<Word> recognizePersonName(List<Word> words){
    int len = words.size();
    if(len < 2){
        return words;
    }
    List<Word> result = new ArrayList<>();
    for(int i=0; i<len-1; i++){
        String second = words.get(i+1).getText();
        if(second.length() > 1){
            result.add(new Word(words.get(i).getText()));
            result.add(new Word(words.get(i+1).getText()));
            i++;
            if(i == len-2){
                result.add(new Word(words.get(i+1).getText()));
            }
            continue;
        }
        String first = words.get(i).getText();
        if(isSurname(first)){
            String third = "";
            if(i+2 < len && words.get(i+2).getText().length()==1){
                third = words.get(i+2).getText();                    
            }
            String text = first+second+third;
            if(is(text)){
                if(LOGGER.isDebugEnabled()) {
                    LOGGER.debug("识别到人名:" + text);
                }
                Word word = new Word(text);
                //词性定义参见配置文件word.conf中的定义part.of.speech.des.path=classpath:part_of_speech_des.txt
                word.setPartOfSpeech(PartOfSpeech.valueOf("nr"));
                result.add(word);
                i++;
                if(!"".equals(third)){
                    i++;
                }
            }else{
                result.add(new Word(first));
            }
        }else{
            result.add(new Word(first));
        }
        if(i == len-2){
            result.add(new Word(words.get(i+1).getText()));
        }
    }
    return result;
}
 
Example #4
Source File: PartOfSpeechTagging.java    From word with Apache License 2.0 4 votes vote down vote up
public static void process(List<Word> words){
    words.stream().forEach(word->{
        if(word.getPartOfSpeech()!=null){
            if(LOGGER.isDebugEnabled()) {
                LOGGER.debug("忽略已经标注过的词:{}", word);
            }
            return;
        }
        String wordText = word.getText();
        String pos = GENERIC_TRIE.get(wordText);
        if(pos == null){
            //识别英文
            if(RecognitionTool.isEnglish(wordText)){
                pos = "w";
            }
            //识别数字
            if(RecognitionTool.isNumber(wordText)){
                pos = "m";
            }
            //中文数字
            if(RecognitionTool.isChineseNumber(wordText)){
                pos = "mh";
            }
            //识别小数和分数
            if(RecognitionTool.isFraction(wordText)){
                if(wordText.contains(".")||wordText.contains(".")||wordText.contains("·")){
                    pos = "mx";
                }
                if(wordText.contains("/")||wordText.contains("/")){
                    pos = "mf";
                }
            }
            //识别数量词
            if(RecognitionTool.isQuantifier(wordText)){
                //分数
                if(wordText.contains("‰")||wordText.contains("%")||wordText.contains("%")){
                    pos = "mf";
                }
                //时间量词
                else if(wordText.contains("时")||wordText.contains("分")||wordText.contains("秒")){
                    pos = "tq";
                }
                //日期量词
                else if(wordText.contains("年")||wordText.contains("月")||wordText.contains("日")
                        ||wordText.contains("天")||wordText.contains("号")){
                    pos = "tdq";
                }
                //数量词
                else{
                    pos = "mq";
                }
            }
        }
        word.setPartOfSpeech(PartOfSpeech.valueOf(pos));
    });
}