Java Code Examples for org.nlpcn.commons.lang.util.StringUtil#isBlank()

The following examples show how to use org.nlpcn.commons.lang.util.StringUtil#isBlank() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PinyinFormatter.java    From nlp-lang with Apache License 2.0 6 votes vote down vote up
public static String formatPinyin(String pinyinStr, TYPE type) {
	if(StringUtil.isBlank(pinyinStr)){
		return pinyinStr ;
	}
	StringBuilder sb = null ;

	switch (type){
		case UNICODE_PINYIN_FORMAT:
			return convertToneNumber2ToneMark(pinyinStr) ;
		case WITHOUT_NUM_PINYIN_FORMAT:
			return pinyinStr.replaceAll("[1-5]", "");
		case DEFAULT_PINYIN_FORMAT:
			return pinyinStr ;
		case FIRST_CHAR_PINYIN_FORMAT:
			return String.valueOf(pinyinStr.charAt(0)) ;
	}

	return pinyinStr ;

}
 
Example 2
Source File: MemoryIndex.java    From nlp-lang with Apache License 2.0 6 votes vote down vote up
private Set<String> getPrexSplit(final String[] fields) {
	HashSet<String> hs = new HashSet<String>();
	for (String string : fields) {
		if (StringUtil.isBlank(string)) {
			continue;
		}

		string = string.trim();

		for (int i = 1; i < string.length() + 1; i++) {
			hs.add(string.substring(0, i));
		}
	}

	return hs;
}
 
Example 3
Source File: MemoryIndex.java    From nlp-lang with Apache License 2.0 6 votes vote down vote up
public List<T> suggest(String key) {
	if (StringUtil.isBlank(key)) {
		return Collections.emptyList();
	}

	key = key.replace("\\s", "");

	List<T> result = new LinkedList<T>();
	TreeSet<Entry> treeSet = index.get(key);
	if (treeSet == null) {
		return result;
	}

	for (Entry entry : treeSet) {
		result.add(entry.t);
	}
	return result;
}
 
Example 4
Source File: DoubleArrayTire.java    From nlp-lang with Apache License 2.0 5 votes vote down vote up
/**
 * 获得一个词语的item
 */
@SuppressWarnings("unchecked")
public <T extends Item> T getItem(String str) {
	if (StringUtil.isBlank(str)) {
		return null;
	}
	if (str.length() == 1) {
		return (T) dat[str.charAt(0)];
	}

	Item item = dat[str.charAt(0)];
	if (item == null) {
		return null;
	}
	for (int i = 1; i < str.length(); i++) {
		final int checkValue = item.index;
		if (item.base + str.charAt(i) > dat.length - 1) {
			return null;
		}

		item = dat[item.base + str.charAt(i)];
		if (item == null) {
			return null;
		}
		if (item.check != -1 && item.check != checkValue) {
			return null;
		}
	}
	return (T) item;
}
 
Example 5
Source File: DATMaker.java    From nlp-lang with Apache License 2.0 5 votes vote down vote up
/**
 * 构建用户自定义的dat
 * 
 * @throws FileNotFoundException
 * @throws IllegalAccessException
 * @throws InstantiationException
 */
public void maker(final String dicPath, final Class<? extends Item> cla) throws FileNotFoundException, InstantiationException, IllegalAccessException {
	long start = System.currentTimeMillis();
	LOG.info("make basic tire begin !");

	final SmartForest<Item> forest = new SmartForest<Item>();
	final FileIterator it = IOUtil.instanceFileIterator(dicPath, IOUtil.UTF8);
	if (it == null) {
		throw new FileNotFoundException();
	}
	try {
		String temp;
		while (it.hasNext()) {
			temp = it.next();
			if (StringUtil.isBlank(temp)) {
				continue;
			}
			final Item item = cla.newInstance();
			final String[] split = temp.split("\t");
			item.init(split);
			forest.add(split[0], item);
		}
	} finally {
		it.close();
	}
	LOG.info("make basic tire over use time " + (System.currentTimeMillis() - start) + " ms");

	start = System.currentTimeMillis();
	LOG.info("make dat tire begin !");
	makeDAT(tree2List(cla, forest));
	LOG.info("make dat tire over use time " + (System.currentTimeMillis() - start) + " ms! dat len is " + datArrLen() + "! dat size is " + datItemSize());

}
 
Example 6
Source File: MemoryIndex.java    From nlp-lang with Apache License 2.0 5 votes vote down vote up
/**
 * 搜索提示
 * 
 * @param value
 *            返回内容
 * @param score
 *            分数
 * @param fields
 *            提示内容
 */
public void addItem(T value, Double score, String... fields) {
	Set<String> result = null;

	if (fields == null || fields.length == 0) {
		fields = new String[] { value.toString() };
	}

	switch (model) {
	case ALL:
		result = getAllSplit(fields);
		break;
	case PREX:
		result = getPrexSplit(fields);
		break;
	}

	TreeSet<Entry> treeSet;
	for (String key : result) {
		if (StringUtil.isBlank(key)) {
			continue;
		}
		treeSet = index.get(key);

		if (treeSet == null) {
			treeSet = new TreeSet<Entry>();
			index.put(key, treeSet);
		}
		treeSet.add(new Entry(value, score(value, score)));

		if (treeSet.size() > this.size) {
			treeSet.pollLast();
		}
	}
}
 
Example 7
Source File: MemoryIndex.java    From nlp-lang with Apache License 2.0 5 votes vote down vote up
private Set<String> getAllSplit(final String[] fields) {
	HashSet<String> hs = new HashSet<String>();
	for (String string : fields) {
		if (StringUtil.isBlank(string)) {
			continue;
		}
		string = string.trim();
		for (int i = 0; i < string.length(); i++) {
			for (int j = i + 1; j < string.length() + 1; j++) {
				hs.add(string.substring(i, j));
			}
		}
	}
	return hs;
}
 
Example 8
Source File: Analysis.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * while 循环调用.直到返回为null则分词结束
 * 
 * @return
 * @throws IOException
 */

public Term next() throws IOException {
    Term term = null;
    if (!terms.isEmpty()) {
        term = terms.poll();
        term.updateOffe(offe);
        return term;
    }

    String temp = br.readLine();
    offe = br.getStart();
    while (StringUtil.isBlank(temp)) {
        if (temp == null) {
            return null;
        } else {
            temp = br.readLine();
        }

    }

    // 歧异处理字符串

    fullTerms(temp);

    if (!terms.isEmpty()) {
        term = terms.poll();
        term.updateOffe(offe);
        return term;
    }

    return null;
}
 
Example 9
Source File: CRFppTxtModel.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * 加载特征标签转换
 * 
 * @param br
 * @return
 * @throws Exception
 */
private int[] loadTagCoven(BufferedReader br) throws Exception {

    int[] conver = new int[Config.TAG_NUM + Config.TAG_NUM * Config.TAG_NUM];

    String temp = null;

    // TODO: 这个是个写死的过程,如果标签发生改变需要重新来写这里
    for (int i = 0; i < Config.TAG_NUM; i++) {
        String line = br.readLine();
        if (StringUtil.isBlank(line)) {
            i--;
            continue;
        }

        char c = line.charAt(0);
        switch (c) {
            case 'S':
                conver[i] = Config.S;
                break;
            case 'B':
                conver[i] = Config.B;
                break;
            case 'M':
                conver[i] = Config.M;
                break;
            case 'E':
                conver[i] = Config.E;
                break;
            default:
                throw new Exception("err tag named " + c + " in model " + temp);
        }
    }

    for (int i = Config.TAG_NUM; i < conver.length; i++) {
        conver[i] = conver[(i - 4) / Config.TAG_NUM] * Config.TAG_NUM + conver[i % Config.TAG_NUM] + Config.TAG_NUM;
    }

    return conver;
}
 
Example 10
Source File: SplitWord.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public List<String> cut(String line) {

        if (StringUtil.isBlank(line)) {
            return Collections.emptyList();
        }

        List<Element> elements = vterbi(line);

        List<String> result = new ArrayList<>();

        Element e = null;
        int begin = 0;
        int end = 0;
        int size = elements.size() - 1;
        for (int i = 0; i < elements.size(); i++) {
            e = elements.get(i);
            switch (e.getTag()) {
                case 0:
                    end += e.len;
                    result.add(line.substring(begin, end));
                    begin = end;
                    break;
                case 1:
                    end += e.len;
                    while (i < size && (e = elements.get(++i)).getTag() != 3) {
                        end += e.len;
                    }
                    end += e.len;
                    result.add(line.substring(begin, end));
                    begin = end;
                default:
                    break;
            }
        }
        return result;
    }
 
Example 11
Source File: KeyWordComputer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * @param title   标题
 * @param content 正文
 * @return
 */
public List<Keyword> computeArticleTfidf(String title, String content) {
    if (StringUtil.isBlank(title)) {
        title = "";
    }
    if (StringUtil.isBlank(content)) {
        content = "";
    }
    return computeArticleTfidf(title + "\t" + content, title.length());
}
 
Example 12
Source File: SynonymsLibrary.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
/**
 * 加载词典
 * 
 * @param key
 * @param kv
 * @param reload 是否更新词典
 * @return
 */
private static synchronized SmartForest<List<String>> init(String key, KV<String, SmartForest<List<String>>> kv,
                boolean reload) {

    SmartForest<List<String>> forest = kv.getV();

    if (forest != null) {
        if (reload) {
            forest.clear();
        } else {
            return forest;
        }
    } else {
        forest = new SmartForest<>();
    }

    LOG.debug("begin init synonyms " + kv.getK());
    long start = System.currentTimeMillis();

    try (BufferedReader reader = IOUtil.getReader(PathToStream.stream(kv.getK()), IOUtil.UTF8)) {
        String temp = null;
        while ((temp = reader.readLine()) != null) {
            if (StringUtil.isBlank(temp)) {
                continue;
            }
            String[] split = temp.split("\t");

            List<String> list = new ArrayList<>();
            for (String word : split) {
                if (StringUtil.isBlank(word)) {
                    continue;
                }
                list.add(word);
            }

            if (split.length <= 1) {
                LOG.warn(temp + " in synonymsLibrary not in to library !");
                continue;
            }

            for (int i = 0; i < split.length; i++) {
                forest.add(split[i], list);
            }
        }
        kv.setV(forest);
        LOG.info("load synonyms use time:" + (System.currentTimeMillis() - start) + " path is : " + kv.getK());
        return forest;
    } catch (Exception e) {
        LOG.error("Init synonyms library error :" + e.getMessage() + ", path: " + kv.getK());
        SYNONYMS.remove(key);
        return null;
    }
}