Java Code Examples for org.wltea.analyzer.core.Lexeme#getEndPosition()

The following examples show how to use org.wltea.analyzer.core.Lexeme#getEndPosition() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: IkTokenizer.java From jstarcraft-nlp with Apache License 2.0

6 votes

@Override
public boolean incrementToken() throws IOException {
    // 清除所有的词元属性
    clearAttributes();
    Lexeme nextLexeme = _IKImplement.next();
    if (nextLexeme != null) {
        // 将Lexeme转成Attributes
        // 设置词元文本
        termAttribute.append(nextLexeme.getLexemeText());
        // 设置词元长度
        termAttribute.setLength(nextLexeme.getLength());
        // 设置词元位移
        offsetAttribute.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());
        // 记录分词的最后位置
        endPosition = nextLexeme.getEndPosition();
        // 记录词元分类
        typeAttribute.setType(nextLexeme.getLexemeTypeString());
        // 返会true告知还有下个词元
        return true;
    }
    // 返会false告知词元输出完毕
    return false;
}

Example 2

Source File: IKTokenizer.java From es-ik with Apache License 2.0

6 votes

@Override
public boolean incrementToken() throws IOException {
    //清除所有的词元属性
    clearAttributes();
    Lexeme nextLexeme = _IKImplement.next();
    if (nextLexeme != null) {
        //将Lexeme转成Attributes
        //设置词元文本
        termAtt.append(nextLexeme.getLexemeText());
        //设置词元长度
        termAtt.setLength(nextLexeme.getLength());
        //设置词元位移
        offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());
        //记录分词的最后位置
        endPosition = nextLexeme.getEndPosition();
        //记录词元分类
        typeAtt.setType(nextLexeme.getLexemeTypeString());
        //返会true告知还有下个词元
        return true;
    }
    //返会false告知词元输出完毕
    return false;
}

Example 3

Source File: IKTokenizer.java From IKAnalyzer with Apache License 2.0

6 votes

/** {@inheritDoc} */
@Override
public boolean incrementToken() throws IOException {
	//清除所有的词元属性
	clearAttributes();
	Lexeme nextLexeme = _IKImplement.next();
	if(nextLexeme != null){
		//将Lexeme转成Attributes
		//设置词元文本
		termAtt.append(nextLexeme.getLexemeText());
		//设置词元长度
		termAtt.setLength(nextLexeme.getLength());
		//设置词元位移
		offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());
		//记录分词的最后位置
		finalOffset = nextLexeme.getEndPosition();
		//返会true告知还有下个词元
		return true;
	}
	//返会false告知词元输出完毕
	return false;
}

Example 4

Source File: IKTokenizer.java From ik-analyzer with GNU General Public License v3.0

6 votes

@Override
public boolean incrementToken() throws IOException {
    //清除所有的词元属性
    clearAttributes();
    Lexeme nextLexeme = ikimplement.next();
    if (nextLexeme != null) {
        //将Lexeme转成Attributes
        //设置词元文本
        termAtt.append(nextLexeme.getLexemeText());
        //设置词元长度
        termAtt.setLength(nextLexeme.getLength());
        //设置词元位移
        offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());
        //记录分词的最后位置
        endPosition = nextLexeme.getEndPosition();
        //记录词元分类
        typeAtt.setType(nextLexeme.getLexemeTypeString());
        //返会true告知还有下个词元
        return true;
    }
    //返会false告知词元输出完毕
    return false;
}

Example 5

Source File: IKTokenizer.java From Elasticsearch-Tutorial-zh-CN with GNU General Public License v3.0

5 votes

@Override
public boolean incrementToken() throws IOException {
	//清除所有的词元属性
	clearAttributes();
       skippedPositions = 0;

       Lexeme nextLexeme = _IKImplement.next();
	if(nextLexeme != null){
           posIncrAtt.setPositionIncrement(skippedPositions +1 );

		//将Lexeme转成Attributes
		//设置词元文本
		termAtt.append(nextLexeme.getLexemeText());
		//设置词元长度
		termAtt.setLength(nextLexeme.getLength());
		//设置词元位移
           offsetAtt.setOffset(correctOffset(nextLexeme.getBeginPosition()), correctOffset(nextLexeme.getEndPosition()));

           //记录分词的最后位置
		endPosition = nextLexeme.getEndPosition();
		//记录词元分类
		typeAtt.setType(nextLexeme.getLexemeTypeString());			
		//返会true告知还有下个词元
		return true;
	}
	//返会false告知词元输出完毕
	return false;
}