Java Code Examples for it.unimi.dsi.fastutil.objects.Object2IntMap#size()

The following examples show how to use it.unimi.dsi.fastutil.objects.Object2IntMap#size() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DL4JSequenceRecommender.java    From inception with Apache License 2.0 5 votes vote down vote up
private String[] compileTagset(Object2IntMap<String> aTagsetCollector)
{
    String[] tagset = new String[aTagsetCollector.size()];
    for (Entry<String> e : aTagsetCollector.object2IntEntrySet()) {
        tagset[e.getIntValue()] = e.getKey();
    }
    return tagset;
}
 
Example 2
Source File: SmartDictionarySerializer.java    From mph-table with Apache License 2.0 5 votes vote down vote up
private String[] dictionaryToIndex(final Object2IntMap<String> dict) throws IOException {
    final String[] result = new String[dict.size()];
    for (final Map.Entry<String, Integer> entry : dict.entrySet()) {
        final String word = entry.getKey();
        final Integer index = entry.getValue();
        if (index == null || index < 0 || index >= result.length) {
            throw new IOException("inconsistent dictionary, has " + result.length + " entries but an index of " + word + " -> " + index);
        }
        if (result[index] != null) {
            throw new IOException("inconsistent dictionary, both " + result[index] + " and " + word + " map to " + index);
        }
        result[index] = word;
    }
    return result;
}
 
Example 3
Source File: BestAnchors.java    From tagme with Apache License 2.0 5 votes vote down vote up
String findBest(int wid, final Object2IntMap<String> anchors) throws IOException
{
	Query q = new TermQuery(new Term(WikipediaIndexer.FIELD_WID, ""+wid));
	TopDocs td = articles.search(q, 1);
	if (td.totalHits == 0) return null;//throw new IOException("Unable to find title for WID:"+wid);
	String title = articles.doc(td.scoreDocs[0].doc).get(WikipediaIndexer.FIELD_TITLE);
	title = title.replaceAll("\\&quot;", "\"");

	Set<String> titleTerms = terms(title).keySet();

	List<String> bests = new ArrayList<String>(anchors.size());
	bests.addAll(anchors.keySet());
	Collections.sort(bests, new Comparator<String>() {
		@Override
		public int compare(String o1, String o2) {
			return anchors.getInt(o2)-anchors.getInt(o1);
		}
	});


	for (String a : bests)
	{
		if (anchors.getInt(a)< MIN_ANCHORS) continue;
		Set<String> anchorTerms = terms(a).keySet();
		for(String aw : anchorTerms)
			if (!titleTerms.contains(aw))
				return a;
	}
	return null;
}
 
Example 4
Source File: AllWIDs.java    From tagme with Apache License 2.0 5 votes vote down vote up
@Override
protected IntSet parseSet() throws IOException {
	
	Object2IntMap<String> title2wid = new TitlesToWIDMap(lang).getDataset();
	
	IntOpenHashSet wids = new IntOpenHashSet(title2wid.size()*2);
	wids.addAll(title2wid.values());
	wids.trim();
	
	return wids;
}
 
Example 5
Source File: IgnoreWIDs.java    From tagme with Apache License 2.0 5 votes vote down vote up
@Override
protected IntSet parseSet() throws IOException
{
	log.info("Loading data...");
	Object2IntMap<String> titles = new TitlesToWIDMap(lang).getDataset();
	IntOpenHashSet ids = new IntOpenHashSet(titles.size());
	
	Pattern p_date = WikiPatterns.getPattern(lang, Type.PAGE_DATE);
	Pattern p_other = WikiPatterns.getPattern(lang, Type.PAGE_IGNORE);
	
	PLogger plog = new PLogger(log,"titles","dates","others").setEnd(0, titles.size()).start("Parsing ignore-pages...");
	for(String title : titles.keySet())
	{
		plog.update(0);
		if (p_date.matcher(title).find()) {
			plog.update(1);
			ids.add(titles.get(title));
		}
		else if (p_other.matcher(title).find()) {
			plog.update(2);
			ids.add(titles.get(title));
		}
	}
	plog.stop();
	
	ids.trim();
	return ids;
}