com.github.mgunlogson.cuckoofilter4j.CuckooFilter Java Examples

The following examples show how to use com.github.mgunlogson.cuckoofilter4j.CuckooFilter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 6 votes vote down vote up
@Test
public void testEquals() {
	CuckooFilter<Integer> partFull = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	CuckooFilter<Integer> full = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	for (int i = 0; i < 1000000; i++) {
		assertTrue(partFull.put(i));
	}
	for (int i = 0;; i++) {
		if (!full.put(i))
			break;
	}
	new EqualsTester().addEqualityGroup(partFull).addEqualityGroup(full)
			.addEqualityGroup(new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
					.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build())
			.addEqualityGroup(new CuckooFilter.Builder<>(Funnels.longFunnel(), 2000000).withFalsePositiveRate(0.01)
					.withHashAlgorithm(Algorithm.Murmur3_32).build())
			.addEqualityGroup(new CuckooFilter.Builder<>(Funnels.integerFunnel(), 1000000)
					.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build())
			.addEqualityGroup(new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
					.withFalsePositiveRate(0.03).withHashAlgorithm(Algorithm.Murmur3_32).build())
			.addEqualityGroup(new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
					.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_128).build())
			.testEquals();
}
 
Example #2
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 6 votes vote down vote up
@Test
public void sanityTestVictimCache() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();

	for (int i = 0; i < 9; i++) {
		assertTrue(filter.put(42));
	}
	assertTrue(filter.getCount() == 9);
	for (int i = 0; i < 9; i++) {
		assertTrue(filter.mightContain(42));
		assertTrue(filter.delete(42));
	}
	assertFalse(filter.delete(42));
	assertFalse(filter.mightContain(42));
	assertTrue(filter.getCount() == 0);
	// at this point victim cache is in use since both buckets for 42 are
	// full

}
 
Example #3
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 6 votes vote down vote up
@Test
public void sanityFalsePositiveRate() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	int maxInsertedVal = 100000;
	// make a list of test values(all unique)
	for (int i = 0; i < maxInsertedVal; i++) {
		// will return false if filter is full...should NOT be
		assertTrue(filter.put(i));
	}
	// check for false positive rate(contains something I didn't add)
	int falsePositives = 0;
	maxInsertedVal += 1;
	int totalAttempts = 100000;
	for (int i = maxInsertedVal; i < totalAttempts + maxInsertedVal; i++) {
		if (filter.mightContain(i))
			falsePositives++;
	}
	assertTrue((double) falsePositives / totalAttempts + " false positive rate is above limit",
			(double) falsePositives / totalAttempts < 0.02);

}
 
Example #4
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 6 votes vote down vote up
@Test
public void sanityFalseDeleteRate() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	int maxInsertedVal = 100000;
	for (int i = 0; i < maxInsertedVal; i++) {
		// will return false if filter is full...should NOT be
		assertTrue(filter.put(i));
	}
	// check for false delete rate(deleted something I didn't add
	// successfully)
	int falseDeletes = 0;
	// false delete rate should roughly match false positive rate
	int totalAttempts = 10000;
	maxInsertedVal += 1;
	for (int i = maxInsertedVal; i < totalAttempts + maxInsertedVal; i++) {
		if (filter.delete(i))
			falseDeletes++;
	}
	assertTrue(
			falseDeletes
					+ " false deletions detected. False delete rate is above 0.02 on filter with 0.01 false positive rate",
			(double) falseDeletes / totalAttempts < 0.02);

}
 
Example #5
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 6 votes vote down vote up
@Test
public void sanityFalseNegative() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	// add them to filter
	for (int i = 0; i < 100000; i++) {
		// will return false if filter is full...should NOT be
		assertTrue(filter.put(i));
	}
	// check for false negatives
	int falseNegatives = 0;
	for (int i = 0; i < 100000; i++) {
		if (!filter.mightContain(i)) {
			falseNegatives++;
		}
	}
	assertTrue(falseNegatives + " false negatives detected", falseNegatives == 0);

}
 
Example #6
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 6 votes vote down vote up
@Test
public void sanityFailedDelete() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();

	// make a list of test values(all unique)
	int maxInsertedVal = 100000;
	for (int i = 0; i < maxInsertedVal; i++) {
		// will return false if filter is full...should NOT be
		assertTrue(filter.put(i));
	}
	// check for false deletes(if I can't delete something that's definitely
	// there)
	int falseDeletes = 0;
	for (int i = 0; i < maxInsertedVal; i++) {
		if (!filter.delete(i)) {
			falseDeletes++;
		}
	}
	assertTrue(falseDeletes + " false deletions detected", falseDeletes == 0);
}
 
Example #7
Source File: Cuckoo.java    From XRTB with Apache License 2.0 5 votes vote down vote up
/**
 * Reads a file or S3 object line by line and loads the filter.
 * @param br BufferedReader. The line-by-line reader.
 * @throws Exception on I/O errors.
 */
void makeFilter(BufferedReader br, long sz) throws Exception {
	String[] parts;
	int i;

	
	String line = br.readLine();
	line = line.trim();
	i = 0;
	
	parts = eatquotedStrings(line);
	for (i = 0; i < parts.length; i++) {
		parts[i] = parts[i].replaceAll("\"", "");
	}
	long size = parts[0].length() - 5;
	size = sz / size;
	double fpp = 0.03; // desired false positive probability
	cuckooFilter = new CuckooFilter.Builder<>(Funnels.stringFunnel(Charset.forName("UTF-8")), size).build();
	cuckooFilter.put(parts[0]);
	

	while ((line = br.readLine()) != null) {
		parts = eatquotedStrings(line);
		for (i = 0; i < parts.length; i++) {
			parts[i] = parts[i].replaceAll("\"", "");
		}
		cuckooFilter.put(parts[0]);
	}
	br.close();
}
 
Example #8
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 5 votes vote down vote up
@Test
public void testSerializeFull() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	for (int i = 0;; i++) {
		if (!filter.put(i))
			break;
	}
	SerializableTester.reserializeAndAssert(filter);
}
 
Example #9
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 5 votes vote down vote up
@Test
public void testSerializePartFull() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	for (int i = 0; i < 1000000; i++) {
		assertTrue(filter.put(i));
	}
	SerializableTester.reserializeAndAssert(filter);
}
 
Example #10
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 5 votes vote down vote up
@Test
public void testCopyFull() {
	// totally full will test victim cache as well
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	// fill until victim cache full
	for (int i = 0;; i++) {
		// go until filter totally full
		if (!filter.put(i))
			break;
	}
	CuckooFilter<Integer> filterCopy = filter.copy();
	assertTrue(filterCopy.equals(filter));
	assertNotSame(filter, filterCopy);
}
 
Example #11
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 5 votes vote down vote up
@Test
public void testCopyPartFull() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	for (int i = 0; i < 1000000; i++) {
		assertTrue(filter.put(i));
	}
	CuckooFilter<Integer> filterCopy = filter.copy();
	assertTrue(filterCopy.equals(filter));
	assertNotSame(filter, filterCopy);
}
 
Example #12
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 5 votes vote down vote up
@Test
public void testCopyEmpty() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	CuckooFilter<Integer> filterCopy = filter.copy();
	assertTrue(filterCopy.equals(filter));
	assertNotSame(filter, filterCopy);
}
 
Example #13
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 5 votes vote down vote up
@Test
public void sanityFillDeleteAllAndCheckABunchOfStuff() {
	// test with different filter sizes
	for (int k = 1; k < 20; k++) {
		int filterKeys = 20000 * k;
		CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), filterKeys)
				.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
		// repeatedly fill and drain filter
		for (int j = 0; j < 3; j++) {
			stressFillDrainCheck(filter);
		}
	}
}
 
Example #14
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 5 votes vote down vote up
@Test
public void testVictimCacheTagComparison() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	filter.hasVictim = true;
	filter.victim = new Victim(1, 2, 42);
	BucketAndTag test1 = new BucketAndTag(filter.victim.getI1(), 42);
	BucketAndTag test2 = new BucketAndTag(filter.victim.getI2(), 42);
	assertTrue(filter.checkIsVictim(test1));
	assertTrue(filter.checkIsVictim(test2));
}
 
Example #15
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 5 votes vote down vote up
@Test
public void sanityOverFillBucketMoreThan2B() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 100000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	int maxTries = 30;
	int failedAt = maxTries;
	for (int i = 0; i < maxTries; i++) {
		if (!filter.put(2)) {
			failedAt = i;
			break;
		}
	}
	assertTrue("Duplicate insert failed at " + failedAt + " Expected value is (2*BUCKET_SIZE)+victim cache = 9",
			failedAt == 9);
}
 
Example #16
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 5 votes vote down vote up
@Test
public void sanityApproimateCount() {
	CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), 130000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();
	// fill buckets with duplicates, count along the way
	for (int i = 0; i < 8; i++) {
		assertTrue(filter.put(42));
		assertTrue(filter.approximateCount(42) == i + 1);
	}
	// should fill victim
	assertTrue(filter.put(42));
	assertTrue(filter.approximateCount(42) == 9);
	// should fail
	assertFalse(filter.put(42));
	// count should be the same
	assertTrue(filter.approximateCount(42) == 9);
	// should delete victim and another pos
	assertTrue(filter.delete(42) && filter.delete(42));
	// should be 7 copies now
	assertTrue(filter.approximateCount(42) == 7);
	// loop delete rest
	for (int i = 7; i > 0; i--) {
		assertTrue(filter.delete(42));
		assertTrue(filter.approximateCount(42) == i - 1);
	}
	// should be empty
	assertFalse(filter.mightContain(42));
}
 
Example #17
Source File: Cuckoo.java    From bidder with Apache License 2.0 5 votes vote down vote up
/**
 * Reads a file or S3 object line by line and loads the filter.
 * @param br BufferedReader. The line-by-line reader.
 * @throws Exception on I/O errors.
 */
void makeFilter(BufferedReader br, long sz) throws Exception {
	String[] parts;
	int i;

	
	String line = br.readLine();
	line = line.trim();
	i = 0;
	
	parts = eatquotedStrings(line);
	for (i = 0; i < parts.length; i++) {
		parts[i] = parts[i].replaceAll("\"", "");
	}
	long size = parts[0].length() - 5;
	size = sz / size;
	double fpp = 0.03; // desired false positive probability
	cuckooFilter = new CuckooFilter.Builder<>(Funnels.stringFunnel(Charset.forName("UTF-8")), size).build();
	cuckooFilter.put(parts[0]);
	

	while ((line = br.readLine()) != null) {
		parts = eatquotedStrings(line);
		for (i = 0; i < parts.length; i++) {
			parts[i] = parts[i].replaceAll("\"", "");
		}
		cuckooFilter.put(parts[0]);
	}
	br.close();
}
 
Example #18
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateDifferentHashLengths() {
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withHashAlgorithm(Algorithm.Murmur3_32).build();
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withHashAlgorithm(Algorithm.sipHash24).build();
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withHashAlgorithm(Algorithm.Murmur3_128).build();
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withHashAlgorithm(Algorithm.sha256).build();
}
 
Example #19
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 5 votes vote down vote up
@Test
public void sanityOverFillFilter() {
	// make test set bigger than any size filter we're running
	for (int i = 1; i < 10; i++) {
		int filterKeys = 100000 * i;
		CuckooFilter<Integer> filter = new CuckooFilter.Builder<>(Funnels.integerFunnel(), filterKeys)
				.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build();

		// make a list of test values(all unique)
		// makes sure that filter can handle a 0.8 load factor before
		// insertion failure
		int countFailedAt = 0;
		while (true) {
			if (!filter.put(countFailedAt))
				break;
			countFailedAt++;
		}
		// make sure the filter reports as many as we actually put in
		assertTrue("Filter reports " + filter.getCount() + " when we actually added " + countFailedAt
				+ " items before failing", filter.getCount() == countFailedAt);

		// it's okay if filter is a bit bigger than we asked for, it should
		// never be more than twice as big plus 1 (due to numBuckets power
		// of 2 rounding)
		assertTrue("We were able to add " + countFailedAt + " keys to a filter that was only made to hold "
				+ filterKeys, countFailedAt <= (filterKeys * 2) + 1);

		// keep some tight error bounds to detect small anomalies...just
		// change if errors out too much
		assertTrue(
				"Load Factor only " + filter.getLoadFactor() + " for filter with " + filterKeys
						+ " capacity at first insertion failure. Expected more than 0.95",
				filter.getLoadFactor() > .95);
		assertTrue(
				"Load Factor " + filter.getLoadFactor() + " for filter with " + filterKeys
						+ " capacity at first insertion failure. Expected less than .995",
				filter.getLoadFactor() < 0.995);
	}
}
 
Example #20
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 4 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testInvalidArgsNegFp() {
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withFalsePositiveRate(-0.02).build();
}
 
Example #21
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 4 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testInvalidArgsTooHighFp() {
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withFalsePositiveRate(1).build();
}
 
Example #22
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 4 votes vote down vote up
@Test
public void testSerializeEmpty() {
	SerializableTester.reserializeAndAssert(new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000)
			.withFalsePositiveRate(0.01).withHashAlgorithm(Algorithm.Murmur3_32).build());
}
 
Example #23
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 4 votes vote down vote up
@Test
public void autoTestNulls() {
	// chose 15 for int so it passes checks
	new ClassSanityTester().setDefault(int.class, 15).setDefault(long.class, 15L).setDefault(double.class, 0.001)
			.testNulls(CuckooFilter.class);
}
 
Example #24
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 4 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testInvalidArgsShortHashFunction() {
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), Integer.MAX_VALUE).withFalsePositiveRate(0.01)
			.withHashAlgorithm(Algorithm.Murmur3_32).build();
}
 
Example #25
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 4 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testInvalidArgsZeroFp() {
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withFalsePositiveRate(0.0).build();
}
 
Example #26
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 4 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testInvalidArgsNegItems() {
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), -2000000).build();
}
 
Example #27
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 4 votes vote down vote up
private void stressFillDrainCheck(CuckooFilter<Integer> filter) {
	int maxInsertedVal = 0;
	while (true) {
		// go until filter totally full
		if (!filter.put(maxInsertedVal)) {
			break;
		}
		maxInsertedVal++;
	}
	// everything we added should be there
	for (int i = 0; i < maxInsertedVal; i++) {
		assertTrue("filter doesn't contain " + i + " with " + maxInsertedVal + " total insertions",
				filter.mightContain(i));
	}
	// delete everything we just added
	// make three passes
	// first pass will get most
	// second pass should get any we deleted from "wrong" bucket
	// mathematically (almost)guaranteed to delete all items in filter if
	// it's working properly
	int deleteCount = 0;
	for (int i = 0; i < maxInsertedVal; i++) {
		if (filter.delete(i))
			deleteCount++;
	}
	// second pass
	for (int i = 0; i < maxInsertedVal; i++) {
		if (filter.delete(i))
			deleteCount++;
	}
	// did we get everything?
	assertTrue(maxInsertedVal == deleteCount);
	// does filter know it's empty?
	assertTrue(filter.getCount() == 0);

	// just to make sure everything is properly "gone"
	for (int i = 0; i < maxInsertedVal; i++) {
		assertFalse(filter.delete(i));
	}
	// and even more sure...should be zero false positives because filter is
	// totally empty
	for (int i = 0; i < maxInsertedVal; i++) {
		assertFalse(filter.mightContain(i));
	}
}
 
Example #28
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 4 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void testInvalidArgsZeroConcurrency() {
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withExpectedConcurrency(0).build();
}
 
Example #29
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 4 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
// not multiple of 2 concurrency
public void testInvalidArgsNotMult2Concurrency() {
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withExpectedConcurrency(10).build();
}
 
Example #30
Source File: TestCuckooFilter.java    From CuckooFilter4J with Apache License 2.0 4 votes vote down vote up
@Test
// should just work
public void testConcurrencyWorks() {
	new CuckooFilter.Builder<>(Funnels.integerFunnel(), 2000000).withExpectedConcurrency(16).build();
}