Java Code Examples for com.github.mgunlogson.cuckoofilter4j.CuckooFilter

The following examples show how to use com.github.mgunlogson.cuckoofilter4j.CuckooFilter. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void sanityFalseNegative() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	// add them to filter
	for (int i = 0; i < 100000; i++) {
		// will return false if filter is full...should NOT be
		assertTrue(filter.put(i));
	}
	// check for false negatives
	int falseNegatives = 0;
	for (int i = 0; i < 100000; i++) {
		if (!filter.mightContain(i)) {
			falseNegatives++;
		}
	}
	assertTrue(falseNegatives + " false negatives detected", falseNegatives == 0);

}
 
Example 2
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void sanityFailedDelete() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();

	// make a list of test values(all unique)
	int maxInsertedVal = 100000;
	for (int i = 0; i < maxInsertedVal; i++) {
		// will return false if filter is full...should NOT be
		assertTrue(filter.put(i));
	}
	// check for false deletes(if I can't delete something that's definitely
	// there)
	int falseDeletes = 0;
	for (int i = 0; i < maxInsertedVal; i++) {
		if (!filter.delete(i)) {
			falseDeletes++;
		}
	}
	assertTrue(falseDeletes + " false deletions detected", falseDeletes == 0);
}
 
Example 3
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void sanityFalseDeleteRate() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	int maxInsertedVal = 100000;
	for (int i = 0; i < maxInsertedVal; i++) {
		// will return false if filter is full...should NOT be
		assertTrue(filter.put(i));
	}
	// check for false delete rate(deleted something I didn't add
	// successfully)
	int falseDeletes = 0;
	// false delete rate should roughly match false positive rate
	int totalAttempts = 10000;
	maxInsertedVal += 1;
	for (int i = maxInsertedVal; i < totalAttempts + maxInsertedVal; i++) {
		if (filter.delete(i))
			falseDeletes++;
	}
	assertTrue(
			falseDeletes
					+ " false deletions detected. False delete rate is above 0.02 on filter with 0.01 false positive rate",
			(double) falseDeletes / totalAttempts < 0.02);

}
 
Example 4
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void sanityFalsePositiveRate() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	int maxInsertedVal = 100000;
	// make a list of test values(all unique)
	for (int i = 0; i < maxInsertedVal; i++) {
		// will return false if filter is full...should NOT be
		assertTrue(filter.put(i));
	}
	// check for false positive rate(contains something I didn't add)
	int falsePositives = 0;
	maxInsertedVal += 1;
	int totalAttempts = 100000;
	for (int i = maxInsertedVal; i < totalAttempts + maxInsertedVal; i++) {
		if (filter.mightContain(i))
			falsePositives++;
	}
	assertTrue((double) falsePositives / totalAttempts + " false positive rate is above limit",
			(double) falsePositives / totalAttempts < 0.02);

}
 
Example 5
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void sanityTestVictimCache() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();

	for (int i = 0; i < 9; i++) {
		assertTrue(filter.put(42));
	}
	assertTrue(filter.getCount() == 9);
	for (int i = 0; i < 9; i++) {
		assertTrue(filter.mightContain(42));
		assertTrue(filter.delete(42));
	}
	assertFalse(filter.delete(42));
	assertFalse(filter.mightContain(42));
	assertTrue(filter.getCount() == 0);
	// at this point victim cache is in use since both buckets for 42 are
	// full

}
 
Example 6
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testEquals() {
	CuckooFilter<Integer> partFull = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	CuckooFilter<Integer> full = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	for (int i = 0; i < 1000000; i++) {
		assertTrue(partFull.put(i));
	}
	for (int i = 0;; i++) {
		if (!full.put(i))
			break;
	}
	new EqualsTester().addEqualityGroup(partFull).addEqualityGroup(full)
			.addEqualityGroup(new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
					.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build())
			.addEqualityGroup(new CuckooFilter.Builder<>(Funnels.longFunnel(), 2000000).withFalsePositiveRate(0.01)
					.withHashAlgorithm(Algorithm.Murmur3_32).build())
			.addEqualityGroup(new CuckooFilter.Builder<>(Funnels.integerFunnel(), 1000000)
					.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build())
			.addEqualityGroup(new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
					.withFalsePositiveRate(0.03).withHashAlgorithm(Algorithm.Murmur3_32).build())
			.addEqualityGroup(new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
					.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_128).build())
			.testEquals();
}
 
Example 7
Source Project: bidder   Source File: Cuckoo.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Reads a file or S3 object line by line and loads the filter.
 * @param br BufferedReader. The line-by-line reader.
 * @throws Exception on I/O errors.
 */
void makeFilter(BufferedReader br, long sz) throws Exception {
	String[] parts;
	int i;

	
	String line = br.readLine();
	line = line.trim();
	i = 0;
	
	parts = eatquotedStrings(line);
	for (i = 0; i < parts.length; i++) {
		parts[i] = parts[i].replaceAll("\"", "");
	}
	long size = parts[0].length() - 5;
	size = sz / size;
	double fpp = 0.03; // desired false positive probability
	cuckooFilter = new CuckooFilter.Builder<>(Funnels.stringFunnel(Charset.forName("UTF-8")), size).build();
	cuckooFilter.put(parts[0]);
	

	while ((line = br.readLine()) != null) {
		parts = eatquotedStrings(line);
		for (i = 0; i < parts.length; i++) {
			parts[i] = parts[i].replaceAll("\"", "");
		}
		cuckooFilter.put(parts[0]);
	}
	br.close();
}
 
Example 8
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateDifferentHashLengths() {
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withHashAlgorithm(Algorithm.Murmur3_32).build();
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withHashAlgorithm(Algorithm.sipHash24).build();
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withHashAlgorithm(Algorithm.Murmur3_128).build();
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withHashAlgorithm(Algorithm.sha256).build();
}
 
Example 9
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void sanityApproimateCount() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	// fill buckets with duplicates, count along the way
	for (int i = 0; i < 8; i++) {
		assertTrue(filter.put(42));
		assertTrue(filter.approximateCount(42) == i + 1);
	}
	// should fill victim
	assertTrue(filter.put(42));
	assertTrue(filter.approximateCount(42) == 9);
	// should fail
	assertFalse(filter.put(42));
	// count should be the same
	assertTrue(filter.approximateCount(42) == 9);
	// should delete victim and another pos
	assertTrue(filter.delete(42) && filter.delete(42));
	// should be 7 copies now
	assertTrue(filter.approximateCount(42) == 7);
	// loop delete rest
	for (int i = 7; i > 0; i--) {
		assertTrue(filter.delete(42));
		assertTrue(filter.approximateCount(42) == i - 1);
	}
	// should be empty
	assertFalse(filter.mightContain(42));
}
 
Example 10
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void sanityOverFillFilter() {
	// make test set bigger than any size filter we're running
	for (int i = 1; i < 10; i++) {
		int filterKeys = 100000 * i;
		CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), filterKeys)
				.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();

		// make a list of test values(all unique)
		// makes sure that filter can handle a 0.8 load factor before
		// insertion failure
		int countFailedAt = 0;
		while (true) {
			if (!filter.put(countFailedAt))
				break;
			countFailedAt++;
		}
		// make sure the filter reports as many as we actually put in
		assertTrue("Filter reports " + filter.getCount() + " when we actually added " + countFailedAt
				+ " items before failing", filter.getCount() == countFailedAt);

		// it's okay if filter is a bit bigger than we asked for, it should
		// never be more than twice as big plus 1 (due to numBuckets power
		// of 2 rounding)
		assertTrue("We were able to add " + countFailedAt + " keys to a filter that was only made to hold "
				+ filterKeys, countFailedAt <= (filterKeys * 2) + 1);

		// keep some tight error bounds to detect small anomalies...just
		// change if errors out too much
		assertTrue(
				"Load Factor only " + filter.getLoadFactor() + " for filter with " + filterKeys
						+ " capacity at first insertion failure. Expected more than 0.95",
				filter.getLoadFactor() > .95);
		assertTrue(
				"Load Factor " + filter.getLoadFactor() + " for filter with " + filterKeys
						+ " capacity at first insertion failure. Expected less than .995",
				filter.getLoadFactor() < 0.995);
	}
}
 
Example 11
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void sanityOverFillBucketMoreThan2B() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 100000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	int maxTries = 30;
	int failedAt = maxTries;
	for (int i = 0; i < maxTries; i++) {
		if (!filter.put(2)) {
			failedAt = i;
			break;
		}
	}
	assertTrue("Duplicate insert failed at " + failedAt + " Expected value is (2*BUCKET_SIZE)+victim cache = 9",
			failedAt == 9);
}
 
Example 12
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testVictimCacheTagComparison() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	filter.hasVictim = true;
	filter.victim = new Victim(1, 2, 42);
	BucketAndTag test1 = new BucketAndTag(filter.victim.getI1(), 42);
	BucketAndTag test2 = new BucketAndTag(filter.victim.getI2(), 42);
	assertTrue(filter.checkIsVictim(test1));
	assertTrue(filter.checkIsVictim(test2));
}
 
Example 13
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void sanityFillDeleteAllAndCheckABunchOfStuff() {
	// test with different filter sizes
	for (int k = 1; k < 20; k++) {
		int filterKeys = 20000 * k;
		CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), filterKeys)
				.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
		// repeatedly fill and drain filter
		for (int j = 0; j < 3; j++) {
			stressFillDrainCheck(filter);
		}
	}
}
 
Example 14
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCopyEmpty() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	CuckooFilter<Integer> filterCopy = filter.copy();
	assertTrue(filterCopy.equals(filter));
	assertNotSame(filter, filterCopy);
}
 
Example 15
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCopyPartFull() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	for (int i = 0; i < 1000000; i++) {
		assertTrue(filter.put(i));
	}
	CuckooFilter<Integer> filterCopy = filter.copy();
	assertTrue(filterCopy.equals(filter));
	assertNotSame(filter, filterCopy);
}
 
Example 16
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCopyFull() {
	// totally full will test victim cache as well
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	// fill until victim cache full
	for (int i = 0;; i++) {
		// go until filter totally full
		if (!filter.put(i))
			break;
	}
	CuckooFilter<Integer> filterCopy = filter.copy();
	assertTrue(filterCopy.equals(filter));
	assertNotSame(filter, filterCopy);
}
 
Example 17
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSerializePartFull() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	for (int i = 0; i < 1000000; i++) {
		assertTrue(filter.put(i));
	}
	SerializableTester.reserializeAndAssert(filter);
}
 
Example 18
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSerializeFull() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	for (int i = 0;; i++) {
		if (!filter.put(i))
			break;
	}
	SerializableTester.reserializeAndAssert(filter);
}
 
Example 19
Source Project: XRTB   Source File: Cuckoo.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Reads a file or S3 object line by line and loads the filter.
 * @param br BufferedReader. The line-by-line reader.
 * @throws Exception on I/O errors.
 */
void makeFilter(BufferedReader br, long sz) throws Exception {
	String[] parts;
	int i;

	
	String line = br.readLine();
	line = line.trim();
	i = 0;
	
	parts = eatquotedStrings(line);
	for (i = 0; i < parts.length; i++) {
		parts[i] = parts[i].replaceAll("\"", "");
	}
	long size = parts[0].length() - 5;
	size = sz / size;
	double fpp = 0.03; // desired false positive probability
	cuckooFilter = new CuckooFilter.Builder<>(Funnels.stringFunnel(Charset.forName("UTF-8")), size).build();
	cuckooFilter.put(parts[0]);
	

	while ((line = br.readLine()) != null) {
		parts = eatquotedStrings(line);
		for (i = 0; i < parts.length; i++) {
			parts[i] = parts[i].replaceAll("\"", "");
		}
		cuckooFilter.put(parts[0]);
	}
	br.close();
}
 
Example 20
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 4 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testInvalidArgsTooHighFp() {
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withFalsePositiveRate(1).build();
}
 
Example 21
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 4 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testInvalidArgsShortHashFunction() {
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), Integer.MAX_VALUE).withFalsePositiveRate(0.01)
			.withHashAlgorithm(Algorithm.Murmur3_32).build();
}
 
Example 22
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 4 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testInvalidArgsZeroFp() {
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withFalsePositiveRate(0.0).build();
}
 
Example 23
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 4 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testInvalidArgsNegItems() {
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), -2000000).build();
}
 
Example 24
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 4 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testInvalidArgsNegFp() {
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withFalsePositiveRate(-0.02).build();
}
 
Example 25
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 4 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testInvalidArgsZeroConcurrency() {
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withExpectedConcurrency(0).build();
}
 
Example 26
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 4 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
// not multiple of 2 concurrency
public void testInvalidArgsNotMult2Concurrency() {
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withExpectedConcurrency(10).build();
}
 
Example 27
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 4 votes vote down vote up
@Test
// should just work
public void testConcurrencyWorks() {
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withExpectedConcurrency(16).build();
}
 
Example 28
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 4 votes vote down vote up
@Test
// should just work
public void testNullHash() {
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).build();
}
 
Example 29
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 4 votes vote down vote up
private void stressFillDrainCheck(CuckooFilter<Integer> filter) {
	int maxInsertedVal = 0;
	while (true) {
		// go until filter totally full
		if (!filter.put(maxInsertedVal)) {
			break;
		}
		maxInsertedVal++;
	}
	// everything we added should be there
	for (int i = 0; i < maxInsertedVal; i++) {
		assertTrue("filter doesn't contain " + i + " with " + maxInsertedVal + " total insertions",
				filter.mightContain(i));
	}
	// delete everything we just added
	// make three passes
	// first pass will get most
	// second pass should get any we deleted from "wrong" bucket
	// mathematically (almost)guaranteed to delete all items in filter if
	// it's working properly
	int deleteCount = 0;
	for (int i = 0; i < maxInsertedVal; i++) {
		if (filter.delete(i))
			deleteCount++;
	}
	// second pass
	for (int i = 0; i < maxInsertedVal; i++) {
		if (filter.delete(i))
			deleteCount++;
	}
	// did we get everything?
	assertTrue(maxInsertedVal == deleteCount);
	// does filter know it's empty?
	assertTrue(filter.getCount() == 0);

	// just to make sure everything is properly "gone"
	for (int i = 0; i < maxInsertedVal; i++) {
		assertFalse(filter.delete(i));
	}
	// and even more sure...should be zero false positives because filter is
	// totally empty
	for (int i = 0; i < maxInsertedVal; i++) {
		assertFalse(filter.mightContain(i));
	}
}
 
Example 30
Source Project: CuckooFilter4J   Source File: TestCuckooFilter.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void autoTestNulls() {
	// chose 15 for int so it passes checks
	new ClassSanityTester().setDefault(int.class, 15).setDefault(long.class, 15L).setDefault(double.class, 0.001)
			.testNulls(CuckooFilter.class);
}