gnu.trove.map.hash.TIntFloatHashMap Java Examples

The following examples show how to use gnu.trove.map.hash.TIntFloatHashMap. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FuzzySetSimJoin.java    From JedAIToolkit with Apache License 2.0 6 votes vote down vote up
/**
 * Find matches for a given set
 */
private TIntFloatHashMap search(int[][] querySet, int[][][] collection, float simThreshold,
        TIntObjectMap<TIntList>[] idx) {

    /* SIGNATURE GENERATION */
    TIntSet[] unflattenedSignature = computeUnflattenedSignature(querySet, simThreshold, idx);

    /* CANDIDATE SELECTION AND CHECK FILTER */
    TIntObjectMap<TIntFloatMap> checkFilterCandidates = applyCheckFilter(querySet, collection,
            unflattenedSignature, idx, simThreshold);

    /* NEAREST NEIGHBOR FILTER */
    TIntSet nnFilterCandidates = applyNNFilter(querySet, collection, checkFilterCandidates, simThreshold);

    /* VERIFICATION */
    TIntFloatHashMap matches = verifyCandidates(querySet, collection, nnFilterCandidates, simThreshold);

    return matches;
}
 
Example #2
Source File: FuzzySetSimJoin.java    From JedAIToolkit with Apache License 2.0 6 votes vote down vote up
/**
 * Computes the join between two already transformed and indexed collections
 *
 * @param collection1
 * @param collection2
 * @param simThreshold
 * @return
 */
HashMap<String, Float> join(int[][][] collection1, int[][][] collection2, float simThreshold) {
    final HashMap<String, Float> matchingPairs = new HashMap<>();

    /* CREATE INDEX */
    TIntObjectMap<TIntList>[] idx = buildSetInvertedIndex(collection2, tokenDict.size());

    /* EXECUTE THE JOIN ALGORITHM */
    for (int i = 0; i < collection1.length; i++) {
        TIntFloatHashMap matches = search(collection1[i], collection2, simThreshold, idx);
        for (int j : matches.keys()) {
            matchingPairs.put(i + "_" + j, matches.get(j));
        }
    }

    return matchingPairs;
}
 
Example #3
Source File: KMeansWordCluster.java    From fnlp with GNU Lesser General Public License v3.0 6 votes vote down vote up
private float distanceEuclidean(int n, HashSparseVector sv, float baseDistance) {
    HashSparseVector center = classCenter.get(n);
    int count = classCount.get(n);
    float dist = baseDistance / (count * count);
    TIntFloatHashMap data = center.data;
    TIntFloatIterator it = sv.data.iterator();
    while (it.hasNext()) {
        it.advance();
        int key = it.key();
        if (!data.containsKey(key)) {
            dist += it.value() * it.value();
        }
        else {
            float temp = data.get(key) / count;
            dist -= temp * temp;
            dist += (it.value() - temp) * (it.value() - temp);
        }
    }
    return dist;
}
 
Example #4
Source File: KMeansWordCluster.java    From fnlp with GNU Lesser General Public License v3.0 6 votes vote down vote up
private void updateBaseDist(int classid, HashSparseVector vector) {
    float base = baseDistList.get(classid);
    TIntFloatHashMap center = classCenter.get(classid).data;
    TIntFloatIterator it =  vector.data.iterator();
    while (it.hasNext()) {
        it.advance();
        if (!center.containsKey(it.key())) {
            base += it.value() * it.value();
        }
        else {
            float temp = center.get(it.key());
            base -= temp * temp;
            base += (it.value() - temp) * (it.value() - temp);
        }
    }
    baseDistList.set(classid, base);
}
 
Example #5
Source File: SemSigUtilsTest.java    From ADW with GNU General Public License v3.0 6 votes vote down vote up
@Test
public void testGetSortedIndices() 
{
           TIntFloatMap m = new TIntFloatHashMap();
           m.put(0, 1f);
           m.put(1, 10f);
           m.put(2, 5f);
           m.put(3, 2f);

           int[] sorted = SemSigUtils.getSortedIndices(m);
           assertEquals(4, sorted.length);
           assertEquals(1, sorted[0]);
           assertEquals(2, sorted[1]);
           assertEquals(3, sorted[2]);
           assertEquals(0, sorted[3]);
       }
 
Example #6
Source File: WordCluster.java    From fnlp with GNU Lesser General Public License v3.0 6 votes vote down vote up
private float getweight(int c1, int c2) {
	int max,min;
	if(c1<=c2){
		max = c2;
		min = c1;
	}else{
		max = c1;
		min = c2;
	}
	float w;
	TIntFloatHashMap map2 = wcc.get(min);
	if(map2==null){
		w = 0;
	}else
		w = map2.get(max);
	return w;
}
 
Example #7
Source File: SemSigUtils.java    From ADW with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Normalizes the probability values in a vector so that to sum to 1.0
 * @param vector
 * @return
 */
public static TIntFloatMap normalizeVector(TIntFloatMap vector)
{
	float total = 0;

               TFloatIterator iter = vector.valueCollection().iterator();
               while (iter.hasNext())
                      total += iter.next();
               
               TIntFloatMap normalized = new TIntFloatHashMap(vector.size());
	
               TIntFloatIterator iter2 = vector.iterator();
               while (iter2.hasNext())
               {
                       iter2.advance();
                       normalized.put(iter2.key(), iter2.value() / total);
               }		
	return normalized;
}
 
Example #8
Source File: MyArrays.java    From fnlp with GNU Lesser General Public License v3.0 6 votes vote down vote up
/**
 * 得到总能量值大于thres的元素对应的下标
 * 
 * @param data 稀疏向量
 * @param thres
 * @return 元素下标 int[][] 第一列表示大于阈值的元素 第二列表示小于阈值的元素
 */
public static int[][] getTop(TIntFloatHashMap data, float thres) {
	int[] idx = sort(data);
	int i;
	float total = 0;
	float[] cp = new float[idx.length];
	for (i = idx.length; i-- > 0;) {
		cp[i] = (float) Math.pow(data.get(idx[i]), 2);
		total += cp[i];
	}

	float ratio = 0;
	for (i = 0; i < idx.length; i++) {
		ratio += cp[i] / total;
		if (ratio > thres)
			break;
	}
	int[][] a = new int[2][];
	a[0] = Arrays.copyOfRange(idx, 0, i);
	a[1] = Arrays.copyOfRange(idx, i, idx.length);
	return a;
}
 
Example #9
Source File: MyCollection.java    From fnlp with GNU Lesser General Public License v3.0 6 votes vote down vote up
/**
 * 由大到小排序
 * @param tmap
 * @return 数组下标
 */
public static int[] sort(TIntFloatHashMap tmap) {
	HashMap<Integer, Float> map = new HashMap<Integer, Float>();

	TIntFloatIterator it = tmap.iterator();
	while (it.hasNext()) {
		it.advance();
		int id = it.key();
		float val = it.value();
		map.put(id, Math.abs(val));
	}
	it = null;

	List<Entry> list = sort(map);
	int[] idx = new int[list.size()];
	Iterator<Entry> it1 = list.iterator();
	int i=0;
	while (it1.hasNext()) {
		Entry entry = it1.next();
		idx[i++] = (Integer) entry.getKey();
	}
	return idx;
}
 
Example #10
Source File: MyHashSparseArrays.java    From fnlp with GNU Lesser General Public License v3.0 6 votes vote down vote up
/**
 * 得到总能量值大于thres的元素对应的下标
 * 
 * @param data 稀疏向量
 * @param thres
 * @return 元素下标 int[][] 第一列表示大于阈值的元素 第二列表示小于阈值的元素
 */
public static int[][] getTop(TIntFloatHashMap data, float thres) {
	int[] idx = sort(data);
	int i;
	float total = 0;
	float[] cp = new float[idx.length];
	for (i = idx.length; i-- > 0;) {
		cp[i] = (float) Math.pow(data.get(idx[i]), 2);
		total += cp[i];
	}

	float ratio = 0;
	for (i = 0; i < idx.length; i++) {
		ratio += cp[i] / total;
		if (ratio > thres)
			break;
	}
	int[][] a = new int[2][];
	a[0] = Arrays.copyOfRange(idx, 0, i);
	a[1] = Arrays.copyOfRange(idx, i, idx.length);
	return a;
}
 
Example #11
Source File: SemSigUtilsTest.java    From ADW with GNU General Public License v3.0 5 votes vote down vote up
@Test
public void testTruncateVectorNormalized() 
{
           TIntFloatMap m = new TIntFloatHashMap();
           m.put(0, 1f);
           m.put(1, 10f);
           m.put(2, 5f);
           m.put(3, 2f);

           TIntFloatMap truncated = SemSigUtils.truncateVector(m, false, 2, true);
           assertEquals(2, truncated.size());
           assertEquals(10f / 15f, truncated.get(1), 0.1f);
           assertEquals(5f / 15f, truncated.get(2), 0.1f);            
       }
 
Example #12
Source File: SemSigUtilsTest.java    From ADW with GNU General Public License v3.0 5 votes vote down vote up
@Test
public void testTruncateVector() 
{
           TIntFloatMap m = new TIntFloatHashMap();
           m.put(0, 1f);
           m.put(1, 10f);
           m.put(2, 5f);
           m.put(3, 2f);

           TIntFloatMap truncated = SemSigUtils.truncateVector(m, false, 2, false);
           assertEquals(2, truncated.size());
           assertEquals(10f, truncated.get(1), 0.1f);
           assertEquals(5f, truncated.get(2), 0.1f);
       }
 
Example #13
Source File: WordCluster.java    From fnlp with GNU Lesser General Public License v3.0 5 votes vote down vote up
private float getProb(int c1, int c2) {
	float p;
	TIntFloatHashMap map = pcc.get(c1);
	if(map == null){
		p = 0f;
	}else{
		p = pcc.get(c1).get(c2);						
	}
	return p;
}
 
Example #14
Source File: WordCluster.java    From fnlp with GNU Lesser General Public License v3.0 5 votes vote down vote up
/**
 * 一次性统计概率,节约时间
 */
private void statisticProb() {
	System.out.println("统计概率");
	TIntFloatIterator it = wordProb.iterator();
	while(it.hasNext()){
		it.advance();
		float v = it.value()/totalword;
		it.setValue(v);
		int key = it.key();
		if(key<0)
			continue;
		Cluster cluster = new Cluster(key,v,alpahbet.lookupString(key));
		clusters.put(key, cluster);
	}

	TIntObjectIterator<TIntFloatHashMap> it1 = pcc.iterator();
	while(it1.hasNext()){
		it1.advance();
		TIntFloatHashMap map = it1.value();
		TIntFloatIterator it2 = map.iterator();
		while(it2.hasNext()){
			it2.advance();
			it2.setValue(it2.value()/totalword);
		}
	}

}
 
Example #15
Source File: MyArrays.java    From fnlp with GNU Lesser General Public License v3.0 4 votes vote down vote up
/**
 * 移除能量值小于一定阈值的项
 * @return 
 * 
 */
public static int[] trim(TIntFloatHashMap data, float v) {
	int[][] idx = getTop(data, v);
	setZero(data, idx[1]);
	return idx[0];
}
 
Example #16
Source File: MyHashSparseArrays.java    From fnlp with GNU Lesser General Public License v3.0 4 votes vote down vote up
/**
 * 移除能量值小于一定阈值的项
 * @return 
 * 
 */
public static int[] trim(TIntFloatHashMap data, float v) {
	int[][] idx = getTop(data, v);
	setZero(data, idx[1]);
	return idx[0];
}
 
Example #17
Source File: SemSigProcess.java    From ADW with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Assumes that the SemSigs are already sorted and normalized
 * @param path
 * @param size
 * @param warnings
 * @param normalizationLKB
 * @return
 */
public SemSig getCustomSemSigFromCompressed(String path, int size, boolean warnings, LKB normalizationLKB)
{
	if(size == 0 || size > MAX_VECTOR_SIZE) 
		size = MAX_VECTOR_SIZE;
	
	SemSig vector = new SemSig();
	String offset = GeneralUtils.getOffsetFromPath(path);
	vector.setOffset(offset);
	
	TIntFloatMap map = new TIntFloatHashMap(size); 
	
	if(!new File(path).exists())
	{
		if (warnings)
			log.info("[WARNING: "+path+ " does not exist]");
		
		return vector;
	}
	
	try
	{
		BufferedReader br = new BufferedReader(new FileReader(path));

		float prob;
		float lastProb = 0.0f;
		int lineCounter = 1;
		
		while(br.ready())
		{
			String line = br.readLine();
			if(line.startsWith("!!")) continue;
			
			String[] lineSplit = line.split("\t");
			
			//keeping the IDs
			//String off = IDtoOffsetMap.get(lineSplit[0]);
			int off = Integer.parseInt(lineSplit[0]);
			
			if(lineSplit.length == 1)
			{
				prob = lastProb;
			}
			else
			{
				prob = Float.parseFloat(lineSplit[1]);
				lastProb = prob;
			}
			
			map.put(off, prob);
			
			if(lineCounter++ >= size)
				break;
		}
		
		br.close();
		
	}
	catch(Exception e)
	{
		e.printStackTrace();
	}

               
	if(size != MAX_VECTOR_SIZE)
                       map = SemSigUtils.truncateVector(map, true, size, true);

	vector.setVector(map);
	
	return vector;
}
 
Example #18
Source File: WeightedOverlapTest.java    From ADW with GNU General Public License v3.0 4 votes vote down vote up
@Test
public void testGetSortedIndices() 
{
   	WeightedOverlap WO = new WeightedOverlap();
   	
   	TIntFloatMap map1 = new TIntFloatHashMap();
   	TIntFloatMap map2 = new TIntFloatHashMap();
   	TIntFloatMap map3 = new TIntFloatHashMap();
   	TIntFloatMap map4 = new TIntFloatHashMap();
   	TIntFloatMap map5 = new TIntFloatHashMap();
   	
   	map1.put(1, 1f);
   	map1.put(2, 2f);
   	map1.put(3, 3f);
   	map1.put(4, 4f);
   	map1.put(5, 5f);
   	map1.put(6, 6f);
   	
   	map2.putAll(map1);
   	
   	map3.put(4, 4f);
   	map3.put(5, 5f);
   	map3.put(6, 6f);
   	map3.put(7, 1f);
   	map3.put(8, 2f);
   	map3.put(9, 3f);
   	
   	map4.put(1, 6f);
   	map4.put(4, 5f);
   	map4.put(2, 4f);
   	map4.put(5, 3f);
   	map4.put(3, 2f);
   	map4.put(6, 1f);
   	
   	map5.put(7, 6f);
   	map5.put(8, 5f);
   	
   	double score1 = WO.compare(map1, map2, true);
   	double score2 = WO.compare(map1, map2, true);
   	double score3 = WO.compare(map1, map4, true);
   	double score4 = WO.compare(map1, map5, true);
   	
       assertEquals(1, score1, 0.01);
       assertEquals(1, score2, 0.01);
       assertEquals(0.725, score3, 0.01);
       assertEquals(0, score4, 0.0001);
       
       
}
 
Example #19
Source File: HashSparseVector.java    From fnlp with GNU Lesser General Public License v3.0 4 votes vote down vote up
public HashSparseVector(HashSparseVector v) {
	data = new TIntFloatHashMap(v.data);
}
 
Example #20
Source File: MyHashSparseArrays.java    From fnlp with GNU Lesser General Public License v3.0 3 votes vote down vote up
/**
 * 对部分下标的元素赋零
 * 
 * @param data
 *            数组
 * @param idx
 *            赋值下标
 */
public static void setZero(TIntFloatHashMap data, int[] idx) {
	for(int i = 0; i < idx.length; i++)	{
		if (data.containsKey(idx[i]))	{
			data.remove(idx[i]);
		}
	}
}
 
Example #21
Source File: MyArrays.java    From fnlp with GNU Lesser General Public License v3.0 3 votes vote down vote up
/**
 * 对部分下标的元素赋零
 * 
 * @param data
 *            数组
 * @param idx
 *            赋值下标
 */
public static void setZero(TIntFloatHashMap data, int[] idx) {
	for(int i = 0; i < idx.length; i++)	{
		if (data.containsKey(idx[i]))	{
			data.remove(idx[i]);
		}
	}
}
 
Example #22
Source File: MyHashSparseArrays.java    From fnlp with GNU Lesser General Public License v3.0 votes vote down vote up
/**
 * 对数组的绝对值由大到小排序,返回调整后元素对于的原始下标
 * 
 * @param data
 *            待排序数组
 * @return 原始下标
 */
public static int[] sort(TIntFloatHashMap data) {
	

	return MyCollection.sort(data);
}
 
Example #23
Source File: MyArrays.java    From fnlp with GNU Lesser General Public License v3.0 votes vote down vote up
/**
 * 对数组的绝对值由大到小排序,返回调整后元素对于的原始下标
 * 
 * @param data
 *            待排序数组
 * @return 原始下标
 */
public static int[] sort(TIntFloatHashMap data) {
	

	return MyCollection.sort(data);
}