Java Code Examples for com.hankcs.hanlp.HanLP

The following examples show how to use com.hankcs.hanlp.HanLP. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: rebuild   Source File: SignUpControll.java    License: GNU General Public License v3.0 7 votes vote down vote up
@RequestMapping("checkout-name")
public void checkoutName(HttpServletRequest request, HttpServletResponse response) throws IOException {
	String fullName = getParameterNotNull(request, "fullName");
	
	fullName = fullName.replaceAll("[^a-zA-Z0-9\u4e00-\u9fa5]", "");
	String loginName = HanLP.convertToPinyinString(fullName, "", false);
	if (loginName.length() > 20) {
		loginName = loginName.substring(0, 20);
	}
	if (BlackList.isBlack(loginName)) {
		writeSuccess(response);
		return;
	}
	
	for (int i = 0; i < 100; i++) {
		if (Application.getUserStore().existsName(loginName)) {
			loginName += RandomUtils.nextInt(99);
		} else {
			break;
		}
	}
	
	loginName = loginName.toLowerCase();
	writeSuccess(response, loginName);
}
 
Example 2
Source Project: hanlp-lucene-plugin   Source File: HanLPTokenizerTest.java    License: Apache License 2.0 6 votes vote down vote up
public void testMultiText() throws Exception
{
    String[] sentences = new String[]{
            "中华人民共和国",
            "地大物博"
    };
    tokenizer = new HanLPTokenizer(HanLP.newSegment()
                                           .enableJapaneseNameRecognize(true)
                                           .enableIndexMode(true), null, false);
    for (String sentence : sentences)
    {
        tokenizer.setReader(new StringReader(sentence));
        tokenizer.reset();
        testIncrementToken();
        tokenizer.close();
    }
}
 
Example 3
Source Project: jstarcraft-nlp   Source File: HanLpTokenizerFactory.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * 初始化工厂类
 *
 * @param configuration 通过这个Map保存xml中的配置项
 */
public HanLpTokenizerFactory(Map<String, String> configuration) {
    super(configuration);
    enableIndexMode = getBoolean(configuration, "enableIndexMode", true);
    enableNumberQuantifierRecognize = getBoolean(configuration, "enableNumberQuantifierRecognize", false);
    enableCustomDictionary = getBoolean(configuration, "enableCustomDictionary", true);
    enableCustomDictionaryForcing = getBoolean(configuration, "enableCustomDictionaryForcing", true);
    enableTranslatedNameRecognize = getBoolean(configuration, "enableTranslatedNameRecognize", false);
    enableJapaneseNameRecognize = getBoolean(configuration, "enableJapaneseNameRecognize", false);
    enableOrganizationRecognize = getBoolean(configuration, "enableOrganizationRecognize", false);
    enableNameRecognize = getBoolean(configuration, "enableNameRecognize", false);
    enablePlaceRecognize = getBoolean(configuration, "enablePlaceRecognize", false);
    enableTraditionalChineseMode = getBoolean(configuration, "enableTraditionalChineseMode", false);
    HanLP.Config.Normalization = getBoolean(configuration, "enableNormalization", HanLP.Config.Normalization);
    algorithm = getString(configuration, "algorithm", "viterbi");
    Set<String> customDictionaryPathSet = getSet(configuration, "customDictionaryPath");
    if (customDictionaryPathSet != null) {
        HanLP.Config.CustomDictionaryPath = customDictionaryPathSet.toArray(new String[0]);
    }
    String stopWordDictionaryPath = get(configuration, "stopWordDictionaryPath");
    if (stopWordDictionaryPath != null) {
        stopWordDictionary = new TreeSet<>();
        stopWordDictionary.addAll(IOUtil.readLineListWithLessMemory(stopWordDictionaryPath));
    }
    if (getBoolean(configuration, "enableDebug", false)) {
        HanLP.Config.enableDebug();
    }
}
 
Example 4
Source Project: jstarcraft-nlp   Source File: HanLpSegmentFactory.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Segment build(Map<String, String> configurations) {
    String algorithm = get(configurations, "algorithm", "viterbi");
    Segment segment = HanLP.newSegment(algorithm);

    // 设置模式
    segment.enableIndexMode(getBoolean(configurations, "enableIndexMode", false));

    segment.enableOffset(true);

    // 是否识别数词和量词
    segment.enableNumberQuantifierRecognize(getBoolean(configurations, "enableNumberQuantifierRecognize", false));

    // 是否识别人名
    segment.enableNameRecognize(getBoolean(configurations, "enableNameRecognize", false));

    // 是否识别音译名
    // TODO 考虑是否依赖enableNameRecognize
    segment.enableTranslatedNameRecognize(getBoolean(configurations, "enableTranslatedNameRecognize", false));

    // 是否识别日本名?
    // TODO 考虑是否依赖enableNameRecognize
    segment.enableJapaneseNameRecognize(getBoolean(configurations, "enableJapaneseNameRecognize", false));

    // 是否识别组织名
    segment.enableOrganizationRecognize(getBoolean(configurations, "enableOrganizationRecognize", false));

    // 是否识别地名
    segment.enablePlaceRecognize(getBoolean(configurations, "enablePlaceRecognize", false));
    return segment;
}
 
Example 5
Source Project: jstarcraft-nlp   Source File: HanLpTokenizerTestCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testMultiText() throws Exception {
    String[] sentences = new String[] { "中华人民共和国", "地大物博" };
    tokenizer = new HanLpTokenizer(HanLP.newSegment().enableJapaneseNameRecognize(true).enableIndexMode(true), null);
    for (String sentence : sentences) {
        tokenizer.setReader(new StringReader(sentence));
        tokenizer.reset();
        testIncrementToken();
        tokenizer.close();
    }
}
 
Example 6
Source Project: jstarcraft-nlp   Source File: HanlpSegmenterTestCase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected Tokenizer getSegmenter() {
    Segment segment = HanLP.newSegment();
    segment.enableOffset(true);
    HanLpTokenizer tokenizer = new HanLpTokenizer(segment, Collections.EMPTY_SET);
    return tokenizer;
}
 
Example 7
Source Project: dk-fitting   Source File: Nlputil.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
    System.out.println(HanLP.segment("你好,欢迎使用HanLP汉语处理包!"));

    Nlputil nlputil = new Nlputil();
    String s = nlputil.segment("商品和服务");
    System.out.println("s->" + s);

}
 
Example 8
Source Project: dk-fitting   Source File: Nlputil.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * 关键词提取
 *
 * @param txt    要提取关键词的语句
 * @param keySum 要提取关键字的数量
 * @return 关键词列表
 */
public static String extractKeyword(String txt, int keySum){
    String keyString = "";
    if (txt == null || keySum <= 0){
        return String.valueOf(Collections.emptyList());
    }
    List<String> keyList = HanLP.extractKeyword(txt, keySum);
    for(String s : keyList){

        keyString = s +","+  keyString;
    }
    return keyString;
}
 
Example 9
Source Project: dk-fitting   Source File: Nlputil.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * 短语提取
 *
 * @param txt   文本
 * @param phSum 需要多少个短语
 * @return 短语列表
 */
public static String extractPhrase(String txt, int phSum) {
    String phraseString = "";
    if (txt == null || phSum <= 0){
        return String.valueOf(Collections.emptyList());
    }
    List<String> phraseList = HanLP.extractPhrase(txt, phSum);
    for(String s : phraseList){
        phraseString = s +","+ phraseString;
    }
    return phraseString;
}
 
Example 10
Source Project: dk-fitting   Source File: Nlputil.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * 自动摘要
 *
 * @param txt  要提取摘要的语句
 * @param sSum 摘要句子数量
 * @return 摘要句子列表
 */
public static String extractSummary(String txt, int sSum){
    String summaryString = "";
    if (txt == null || sSum <= 0){
        return String.valueOf(Collections.emptyList());
    }
    List<String> summaryList = HanLP.extractSummary(txt, sSum);
    for(String s : summaryList){
        summaryString = s +","+ summaryString;
    }
    return summaryString;
}
 
Example 11
Source Project: dk-fitting   Source File: Nlputil.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * 拼音转换
 *
 * @param txt 要转换拼音的语句
 * @return 拼音列表
 */
public static String convertToPinyinList(String txt){
    String pinyinString = "";
    if (txt == null){
        return String.valueOf(Collections.emptyList());
    }
    List<Pinyin> pinyinList = HanLP.convertToPinyinList(txt);
    for(Pinyin s : pinyinList){
        pinyinString = s +","+ pinyinString;
    }
    return pinyinString;
}
 
Example 12
Source Project: dk-fitting   Source File: DKNLPBase.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * 拼音转换
 *
 * @param txt 要转换拼音的语句
 * @return 拼音列表
 */
public static List<Pinyin> convertToPinyinList(String txt)
{
    if (txt == null) return Collections.emptyList();

    return HanLP.convertToPinyinList(txt);
}
 
Example 13
Source Project: rebuild   Source File: Field2Schema.java    License: GNU General Public License v3.0 5 votes vote down vote up
/**
 * 中文 -> 拼音(仅保留字母数字)
 * 全英文+数字直接返回,不支持的字符会使用随机数
 * 
 * @param text
 * @return
 */
protected String toPinyinName(final String text) {
	String identifier = text;
	if (text.length() < 4) {
		identifier = "rb" + text + RandomUtils.nextInt(10);
	}
	
	// 全英文直接返回
	if (identifier.matches("[a-zA-Z0-9]+")) {
		if (!CharSet.ASCII_ALPHA.contains(identifier.charAt(0))
				|| BlackList.isBlack(identifier) || BlackList.isSQLKeyword(identifier)) {
			identifier = "rb" + identifier;
		}
		return identifier;
	}
	
	identifier = HanLP.convertToPinyinString(identifier, "", false);
	identifier = identifier.replaceAll("[^a-zA-Z0-9]", "");
	if (StringUtils.isBlank(identifier)) {
		identifier = String.valueOf(System.currentTimeMillis() / 1000);
	}

	char start = identifier.charAt(0);
	if (!CharSet.ASCII_ALPHA.contains(start)) {
		identifier = "rb" + identifier;
	}
	
	identifier = identifier.toLowerCase();
	if (identifier.length() > 42) {
		identifier = identifier.substring(0, 42);
	}
	
	if (!StringHelper.isIdentifier(identifier)) {
		throw new ModifiyMetadataException("无效名称 : " + text);
	}
	return identifier;
}
 
Example 14
/**
 * 方法名: TranslatedName
 * 功 能: 取得日本人名
 * 参 数: @param str
 * 参 数: @return
 * 返 回: List<String>
 * 作 者 : Tenghui.Wang
 * @throws
 */
public static List<String> JapaneseName(String[] str) {
	List<String> list = new ArrayList<String>();
	Segment segment = HanLP.newSegment().enableJapaneseNameRecognize(true);
	for (String sentence : str) {
		List<Term> termList = segment.seg(sentence);
		for (Term term : termList) {
			if (term.toString().contains("nrj")) {
				list.add(term.word);
			}
		}
	}
	return list;
}
 
Example 15
/**
 * 方法名: Organization
 * 功 能: 取得组织机构
 * 参 数: @param str
 * 参 数: @return
 * 返 回: List<String>
 * 作 者 : Tenghui.Wang
 * @throws
 */
public static List<String> Organization(String[] str) {
	List<String> list = new ArrayList<String>();
	Segment segment = HanLP.newSegment().enableOrganizationRecognize(true);
	for (String sentence : str) {
		List<Term> termList = segment.seg(sentence);
		for (Term term : termList) {
			if (term.toString().contains("nt")) {
				list.add(term.word);
			}
		}
	}
	return list;
}
 
Example 16
/**
 * 方法名: ChineseName
 * 功 能: 取得中文名
 * 参 数: @param str
 * 参 数: @return
 * 返 回: List<String>
 * 作 者 : Tenghui.Wang
 * @throws
 */
public static List<String> ChineseName(String[] str) {
	List<String> list = new ArrayList<String>();
	Segment segment = HanLP.newSegment().enableNameRecognize(true);
	for (String sentence : str) {
		List<Term> termList = segment.seg(sentence);
		for (Term term : termList) {
			if (term.toString().contains("nr")) {
				list.add(term.word);
			}
		}
	}
	return list;
}
 
Example 17
/**
 * 方法名: TranslatedName
 * 功 能: 取得音译人名识别
 * 参 数: @param str
 * 参 数: @return
 * 返 回: List<String>
 * 作 者 : Tenghui.Wang
 * @throws
 */
public static List<String> TranslatedName(String[] str) {
	List<String> list = new ArrayList<String>();
	Segment segment = HanLP.newSegment().enableTranslatedNameRecognize(true);
	for (String sentence : str) {
		List<Term> termList = segment.seg(sentence);
		for (Term term : termList) {
			if (term.toString().contains("nrf")) {
				list.add(term.word);
			}
		}
	}
	return list;
}
 
Example 18
Source Project: o2oa   Source File: BaseAction.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
protected List<String> keys(String key) {
	List<String> os = new ArrayList<>();
	for (Term term : HanLP.segment(key)) {
		/* 字段不要太长 */
		if (StringUtils.length(term.word) < 31) {
			os.add(StringUtils.lowerCase(term.word));
		}
	}
	return os;
}
 
Example 19
Source Project: elasticsearch-analysis-hanlp   Source File: ExtMonitor.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void run() {
    List<DictionaryFile> originalDictionaryFileList = DictionaryFileCache.getCustomDictionaryFileList();
    logger.debug("hanlp original custom dictionary: {}", Arrays.toString(originalDictionaryFileList.toArray()));
    reloadProperty();
    List<DictionaryFile> currentDictironaryFileList = getCurrentDictionaryFileList(HanLP.Config.CustomDictionaryPath);
    logger.debug("hanlp current custom dictionary: {}", Arrays.toString(currentDictironaryFileList.toArray()));
    boolean isModified = false;
    for (DictionaryFile currentDictionaryFile : currentDictironaryFileList) {
        if (!originalDictionaryFileList.contains(currentDictionaryFile)) {
            isModified = true;
            break;
        }
    }
    if (isModified) {
        logger.info("reloading hanlp custom dictionary");
        try {
            AccessController.doPrivileged((PrivilegedAction) CustomDictionaryUtility::reload);
        } catch (Exception e) {
            logger.error("can not reload hanlp custom dictionary", e);
        }
        DictionaryFileCache.setCustomDictionaryFileList(currentDictironaryFileList);
        DictionaryFileCache.writeCache();
        logger.info("finish reload hanlp custom dictionary");
    } else {
        logger.info("hanlp custom dictionary isn't modified, so no need reload");
    }
}
 
Example 20
Source Project: elasticsearch-analysis-hanlp   Source File: ExtMonitor.java    License: Apache License 2.0 5 votes vote down vote up
private void reloadProperty() {
    Properties p = new Properties();
    try {
        ClassLoader loader = AccessController.doPrivileged((PrivilegedAction<ClassLoader>) () -> Thread.currentThread().getContextClassLoader());
        if (loader == null) {
            loader = HanLP.Config.class.getClassLoader();
        }
        p.load(new InputStreamReader(Predefine.HANLP_PROPERTIES_PATH == null ? loader.getResourceAsStream("hanlp.properties") : new FileInputStream(Predefine.HANLP_PROPERTIES_PATH), "UTF-8"));
        String root = p.getProperty("root", "").replaceAll("\\\\", "/");
        if (root.length() > 0 && !root.endsWith("/")) {
            root += "/";
        }
        String[] pathArray = p.getProperty("CustomDictionaryPath", "data/dictionary/custom/CustomDictionary.txt").split(";");
        String prePath = root;
        for (int i = 0; i < pathArray.length; ++i) {
            if (pathArray[i].startsWith(" ")) {
                pathArray[i] = prePath + pathArray[i].trim();
            } else {
                pathArray[i] = root + pathArray[i];
                int lastSplash = pathArray[i].lastIndexOf('/');
                if (lastSplash != -1) {
                    prePath = pathArray[i].substring(0, lastSplash + 1);
                }
            }
        }
        AccessController.doPrivileged((PrivilegedAction) () -> HanLP.Config.CustomDictionaryPath = pathArray);
    } catch (Exception e) {
        logger.error("can not find hanlp.properties", e);
    }
}
 
Example 21
public static boolean reload() {
    CustomDictionary.dat.getSize();
    String[] paths = HanLP.Config.CustomDictionaryPath;
    if (paths == null || paths.length == 0) {
        return false;
    }
    logger.debug("begin delete hanlp custom dictionary cache");
    IOUtil.deleteFile(paths[0] + Predefine.BIN_EXT);
    logger.debug("delete hanlp custom dictionary cache successfully");
    return loadMainDictionary(paths[0]);
}
 
Example 22
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
    return new Analyzer.TokenStreamComponents(
        TokenizerBuilder.tokenizer(AccessController.doPrivileged((PrivilegedAction<Segment>)() -> {
            try {
                return new CRFLexicalAnalyzer();
            } catch (IOException e) {
                logger.error("can not use crf analyzer, provider default", e);
                return HanLP.newSegment();
            }
        }), configuration));
}
 
Example 23
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
    return new Analyzer.TokenStreamComponents(
        TokenizerBuilder.tokenizer(AccessController.doPrivileged((PrivilegedAction<Segment>)() -> {
            try {
                return new PerceptronLexicalAnalyzer();
            } catch (IOException e) {
                logger.error("can not use nlp analyzer, provider default", e);
                return HanLP.newSegment();
            }
        }), configuration));
}
 
Example 24
@Test
public void test1() {
    StringReader reader = new StringReader("张三\n\n\n新买的手机");
    SegmentWrapper wrapper = new SegmentWrapper(reader, HanLP.newSegment().enableOffset(true));
    while (true) {
        Term term = wrapper.next();
        if (term == null) {
            break;
        }
        System.out.println(term.word + "\t" + term.nature + "\t" + term.offset + "\t" + term.length());
    }
}
 
Example 25
/**
 * 初始化工厂类
 *
 * @param args 通过这个Map保存xml中的配置项
 */
public HanLPTokenizerFactory(Map<String, String> args) {
    super(args);
    enableIndexMode = getBoolean(args, "enableIndexMode", true);
    enablePorterStemming = getBoolean(args, "enablePorterStemming", false);
    enableNumberQuantifierRecognize = getBoolean(args, "enableNumberQuantifierRecognize", false);
    enableCustomDictionary = getBoolean(args, "enableCustomDictionary", true);
    enableTranslatedNameRecognize = getBoolean(args, "enableTranslatedNameRecognize", false);
    enableJapaneseNameRecognize = getBoolean(args, "enableJapaneseNameRecognize", false);
    enableOrganizationRecognize = getBoolean(args, "enableOrganizationRecognize", false);
    enableNameRecognize = getBoolean(args, "enableNameRecognize", false);
    enablePlaceRecognize = getBoolean(args, "enablePlaceRecognize", false);
    enableTraditionalChineseMode = getBoolean(args, "enableTraditionalChineseMode", false);
    HanLP.Config.Normalization = getBoolean(args, "enableNormalization", HanLP.Config.Normalization);
    Set<String> customDictionaryPathSet = getSet(args, "customDictionaryPath");
    if (customDictionaryPathSet != null) {
        HanLP.Config.CustomDictionaryPath = customDictionaryPathSet.toArray(new String[0]);
    }
    String stopWordDictionaryPath = get(args, "stopWordDictionaryPath");
    if (stopWordDictionaryPath != null) {
        stopWordDictionary = new TreeSet<>();
        stopWordDictionary.addAll(IOUtil.readLineListWithLessMemory(stopWordDictionaryPath));
    }
    if (getBoolean(args, "enableDebug", false)) {
        HanLP.Config.enableDebug();
    }
}
 
Example 26
Source Project: similarity   Source File: Tokenizer.java    License: Apache License 2.0 5 votes vote down vote up
public static List<Word> segment(String sentence) {
    List<Word> results = new ArrayList<>();
    /*// ansj_seg
    List<org.xm.ansj.domain.Term> termList = StandardSegmentation.parse(sentence).getTerms();//ansj
    results.addAll(termList
            .stream()
            .map(term -> new Word(term.getName(), term.getNature().natureStr))
            .collect(Collectors.toList())
    );*/

    /*//Xmnlp
    List<org.xm.xmnlp.seg.domain.Term> termList = Xmnlp.segment(sentence);
    results.addAll(termList
            .stream()
            .map(term -> new Word(term.word, term.getNature().name()))
            .collect(Collectors.toList())
    );*/

    // HanLP
    List<Term> termList = HanLP.segment(sentence);
    results.addAll(termList
            .stream()
            .map(term -> new Word(term.word, term.nature.name()))
            .collect(Collectors.toList())
    );

    return results;
}
 
Example 27
Source Project: similarity   Source File: PinyinDictionaryTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void getXmnlpPinyin() throws Exception {
    // 胳臂
    String pinyin1 = HanLP.convertToPinyinList("胳臂").toString();
    System.out.println("胳臂:" + pinyin1);

    // 划船,计划
    System.out.println("划船:" + HanLP.convertToPinyinList("划船").toString());
    List<Pinyin> pinyinList = HanLP.convertToPinyinList("计划");
    System.out.println("计划:" + pinyinList.toString());
}
 
Example 28
Source Project: similarity   Source File: Word2vecTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void trainModel() throws Exception {
    HanLP.Config.ShowTermNature = false;// 关闭词性标注
    Tokenizer.fileSegment(SEGMENT, RAW_CORPUS, RAW_CORPUS_SPLIT);
    String outputModelPath = Word2vec.trainModel(RAW_CORPUS_SPLIT, RAW_CORPUS_SPLIT_MODEL);
    System.out.println("outputModelPath:" + outputModelPath);
}
 
Example 29
Source Project: hanlp-lucene-plugin   Source File: HanLPTokenizerFactory.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * 初始化工厂类
 *
 * @param args 通过这个Map保存xml中的配置项
 */
public HanLPTokenizerFactory(Map<String, String> args)
{
    super(args);
    enableIndexMode = getBoolean(args, "enableIndexMode", true);
    enablePorterStemming = getBoolean(args, "enablePorterStemming", false);
    enableNumberQuantifierRecognize = getBoolean(args, "enableNumberQuantifierRecognize", false);
    enableCustomDictionary = getBoolean(args, "enableCustomDictionary", true);
    enableCustomDictionaryForcing = getBoolean(args, "enableCustomDictionaryForcing", false);
    enableTranslatedNameRecognize = getBoolean(args, "enableTranslatedNameRecognize", false);
    enableJapaneseNameRecognize = getBoolean(args, "enableJapaneseNameRecognize", false);
    enableOrganizationRecognize = getBoolean(args, "enableOrganizationRecognize", false);
    enableNameRecognize = getBoolean(args, "enableNameRecognize", false);
    enablePlaceRecognize = getBoolean(args, "enablePlaceRecognize", false);
    enableTraditionalChineseMode = getBoolean(args, "enableTraditionalChineseMode", false);
    HanLP.Config.Normalization = getBoolean(args, "enableNormalization", HanLP.Config.Normalization);
    algorithm = getString(args, "algorithm", "viterbi");
    Set<String> customDictionaryPathSet = getSet(args, "customDictionaryPath");
    if (customDictionaryPathSet != null)
    {
        HanLP.Config.CustomDictionaryPath = customDictionaryPathSet.toArray(new String[0]);
    }
    String stopWordDictionaryPath = get(args, "stopWordDictionaryPath");
    if (stopWordDictionaryPath != null)
    {
        stopWordDictionary = new TreeSet<>();
        stopWordDictionary.addAll(IOUtil.readLineListWithLessMemory(stopWordDictionaryPath));
    }
    if (getBoolean(args, "enableDebug", false))
    {
        HanLP.Config.enableDebug();
    }
}
 
Example 30
Source Project: hanlp-lucene-plugin   Source File: HanLPAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * 重载Analyzer接口,构造分词组件
 */
@Override
protected TokenStreamComponents createComponents(String fieldName)
{
    Tokenizer tokenizer = new HanLPTokenizer(HanLP.newSegment().enableOffset(true), filter, enablePorterStemming);
    return new TokenStreamComponents(tokenizer);
}