Java Code Examples for com.hankcs.hanlp.corpus.io.IOUtil#newInputStream()

The following examples show how to use com.hankcs.hanlp.corpus.io.IOUtil#newInputStream() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Nlputil.java    From dk-fitting with Apache License 2.0 6 votes vote down vote up
/**
 * 添加词库
 *
 * @param filePath 新的词库文件,每个词使用回车换行分隔
 * @param encoding 编码
 * @return 空—完成,其它—错误信息
 */
public static String addCK(String filePath, String encoding)
{
    if (filePath == null || encoding == null) return String.format("参数错误:addCK(%s, %s)", filePath, encoding);
    try
    {
        BufferedReader br = new BufferedReader(new InputStreamReader(IOUtil.newInputStream(filePath), encoding));
        String line;
        synchronized (lockCustomDictionary)
        {
            while ((line = br.readLine()) != null)
            {
                CustomDictionary.insert(line);
            }
        }
        br.close();
    } catch (Exception e) {
        System.out.println(e);
        return TextUtility.exceptionToString(e);

    }

    return "添加成功";
}
 
Example 2
Source File: DKNLPBase.java    From dk-fitting with Apache License 2.0 6 votes vote down vote up
/**
 * 添加词库
 *
 * @param filePath 新的词库文件,每个词使用回车换行分隔
 * @param encoding 编码
 * @return 空—完成,其它—错误信息
 */
public static String addCK(String filePath, String encoding)
{
    if (filePath == null || encoding == null) return String.format("参数错误:addCK(%s, %s)", filePath, encoding);
    try
    {
        BufferedReader br = new BufferedReader(new InputStreamReader(IOUtil.newInputStream(filePath), encoding));
        String line;
        synchronized (lockCustomDictionary)
        {
            while ((line = br.readLine()) != null)
            {
                CustomDictionary.insert(line);
            }
        }
        br.close();
    }
    catch (Exception e)
    {
        return TextUtility.exceptionToString(e);
    }

    return null;
}
 
Example 3
Source File: CustomDictionaryUtility.java    From elasticsearch-analysis-hanlp with Apache License 2.0 4 votes vote down vote up
/**
 * 加载用户词典(追加)
 *
 * @param path                  词典路径
 * @param defaultNature         默认词性
 * @param customNatureCollector 收集用户词性
 * @return
 */
private static boolean load(String path, Nature defaultNature, TreeMap<String, CoreDictionary.Attribute> map, LinkedHashSet<Nature> customNatureCollector) {
    try {
        String splitter = "\\s";
        if (path.endsWith(".csv")) {
            splitter = ",";
        }
        BufferedReader br = new BufferedReader(new InputStreamReader(IOUtil.newInputStream(path), "UTF-8"));
        String line;
        boolean firstLine = true;
        while ((line = br.readLine()) != null) {
            if (firstLine) {
                line = IOUtil.removeUTF8BOM(line);
                firstLine = false;
            }
            String[] param = line.split(splitter);
            // 排除空行
            if (param[0].length() == 0) {
                continue;
            }
            // 正规化
            if (HanLP.Config.Normalization) {
                param[0] = CharTable.convert(param[0]);
            }
            int natureCount = (param.length - 1) / 2;
            CoreDictionary.Attribute attribute;
            if (natureCount == 0) {
                attribute = new CoreDictionary.Attribute(defaultNature);
            } else {
                attribute = new CoreDictionary.Attribute(natureCount);
                for (int i = 0; i < natureCount; ++i) {
                    attribute.nature[i] = LexiconUtility.convertStringToNature(param[1 + 2 * i], customNatureCollector);
                    attribute.frequency[i] = Integer.parseInt(param[2 + 2 * i]);
                    attribute.totalFrequency += attribute.frequency[i];
                }
            }
            map.put(param[0], attribute);
        }
        br.close();
    } catch (Exception e) {
        logger.error("hanlp custom dictionary [{}] read failed!", path, e);
        return false;
    }
    return true;
}