Java Code Examples for org.wltea.analyzer.core.IKSegmenter#next()

The following examples show how to use org.wltea.analyzer.core.IKSegmenter#next() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IKAnalyzer.java    From hugegraph with Apache License 2.0 6 votes vote down vote up
@Override
public Set<String> segment(String text) {
    Set<String> result = InsertionOrderUtil.newSet();
    IKSegmenter ik = new IKSegmenter(new StringReader(text),
                                     this.smartSegMode);
    try {
        Lexeme word = null;
        while ((word = ik.next()) != null) {
            result.add(word.getLexemeText());
        }
    } catch (Exception e) {
        throw new HugeException("IKAnalyzer segment text '%s' failed",
                                e, text);
    }
    return result;
}
 
Example 2
Source File: TokenizerAnalyzerUtils.java    From JewelCrawler with GNU General Public License v3.0 6 votes vote down vote up
public static String getAnalyzerResult(String input) {
    StringReader sr=new StringReader(input);
    IKSegmenter ik=new IKSegmenter(sr, true);//true is use smart
    Lexeme lex=null;
    List<String> stopWordsList = getStopWordsList();
    StringBuilder stringBuilder = new StringBuilder();

    try {
        while((lex=ik.next())!=null){
            if(stopWordsList.contains(lex.getLexemeText())) {
                continue;
            }
            stringBuilder.append(lex.getLexemeText() + Constants.BLANKSPACE);
        }
    } catch (IOException e) {
        e.printStackTrace();
        System.out.println("failed to parse input content");
    }
    return stringBuilder.toString();
}
 
Example 3
Source File: ChineseTokenizer.java    From RDMP1 with GNU General Public License v2.0 6 votes vote down vote up
/**
 * 
* @Title: segStr
* @Description: 返回LinkedHashMap的分词
* @param @param content
* @param @return    
* @return Map<String,Integer>   
* @throws
 */
public static Map<String, Long> segStr(String content){
    // 分词
    Reader input = new StringReader(content);
    // 智能分词关闭(对分词的精度影响很大)
    IKSegmenter iks = new IKSegmenter(input, true);
    Lexeme lexeme = null;
    Map<String, Long> words = new LinkedHashMap<String, Long>();
    try {
        while ((lexeme = iks.next()) != null) {
            if (words.containsKey(lexeme.getLexemeText())) {
                words.put(lexeme.getLexemeText(), words.get(lexeme.getLexemeText()) + 1);
            } else {
                words.put(lexeme.getLexemeText(), 1L);
            }
        }
    }catch(IOException e) {
        e.printStackTrace();
    }
    return words;
}
 
Example 4
Source File: StrUtils.java    From Lottery with GNU General Public License v2.0 6 votes vote down vote up
/**
 * 
 * @param keyword 源词汇
 * @param smart 是否智能分词
 * @return 分词词组(,拼接)
 */
public static String getKeywords(String keyword, boolean smart) {
	StringReader reader = new StringReader(keyword);
	IKSegmenter iks = new IKSegmenter(reader, smart);
	StringBuilder buffer = new StringBuilder();
	try {
		Lexeme lexeme;
		while ((lexeme = iks.next()) != null) {
			buffer.append(lexeme.getLexemeText()).append(',');
		}
	} catch (IOException e) {
	}
	//去除最后一个,
	if (buffer.length() > 0) {
		buffer.setLength(buffer.length() - 1);
	}
	return buffer.toString();
}
 
Example 5
Source File: SWMCQueryBuilder.java    From IKAnalyzer with Apache License 2.0 5 votes vote down vote up
/**
 * 分词切分,并返回结链表
 * @param keywords
 * @return
 */
private static List<Lexeme> doAnalyze(String keywords){
	List<Lexeme> lexemes = new ArrayList<Lexeme>();
	IKSegmenter ikSeg = new IKSegmenter(new StringReader(keywords) , true);
	try{
		Lexeme l = null;
		while( (l = ikSeg.next()) != null){
			lexemes.add(l);
		}
	}catch(IOException e){
		e.printStackTrace();
	}
	return lexemes;
}
 
Example 6
Source File: SWMCQueryBuilder.java    From ik-analyzer with GNU General Public License v3.0 5 votes vote down vote up
/**
 * 分词切分,并返回结链表
 *
 * @param keywords
 *
 * @return
 */
private static List<Lexeme> doAnalyze(String keywords) {
    List<Lexeme> lexemes = new ArrayList<Lexeme>();
    IKSegmenter ikSeg = new IKSegmenter(new StringReader(keywords), true);
    try {
        Lexeme l;
        while ((l = ikSeg.next()) != null) {
            lexemes.add(l);
        }
    } catch (IOException e) {
        LOG.error("io error.", e);
    }
    return lexemes;
}