com.google.common.hash.BloomFilter Java Examples

The following examples show how to use com.google.common.hash.BloomFilter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BloomFilterUnitTest.java    From tutorials with MIT License 6 votes vote down vote up
@Test
public void givenBloomFilter_whenAddNStringsToIt_thenShouldNotReturnAnyFalsePositive() {
    //when
    BloomFilter<Integer> filter = BloomFilter.create(
            Funnels.integerFunnel(),
            500,
            0.01);

    //when
    filter.put(1);
    filter.put(2);
    filter.put(3);

    //then
    // the probability that it returns true, but is actually false is 1%
    assertThat(filter.mightContain(1)).isTrue();
    assertThat(filter.mightContain(2)).isTrue();
    assertThat(filter.mightContain(3)).isTrue();

    assertThat(filter.mightContain(100)).isFalse();
}
 
Example #2
Source File: BlacklistPasswordPolicyProviderFactory.java    From keycloak with Apache License 2.0 6 votes vote down vote up
/**
 * Loads the referenced blacklist into a {@link BloomFilter}.
 *
 * @return the {@link BloomFilter} backing a password blacklist
 */
private BloomFilter<String> load() {

    try {
        LOG.infof("Loading blacklist with name %s from %s - start", name, path);

        long passwordCount = getPasswordCount();

        BloomFilter<String> filter = BloomFilter.create(
                Funnels.stringFunnel(StandardCharsets.UTF_8),
                passwordCount,
                FALSE_POSITIVE_PROBABILITY);

        try (BufferedReader br = newReader(path)) {
            br.lines().forEach(filter::put);
        }

        LOG.infof("Loading blacklist with name %s from %s - end", name, path);

        return filter;
    } catch (IOException e) {
        throw new RuntimeException("Could not load password blacklist from path: " + path, e);
    }
}
 
Example #3
Source File: TestBloom.java    From XRTB with Apache License 2.0 6 votes vote down vote up
/**
 * Test a valid bid response.
 * @throws Exception on networking errors.
 */
@Test 
public void testBloom() throws Exception {
 
 
   new Bloom("$test","data/c1x_cookies.csv");
   BloomFilter b = (BloomFilter)LookingGlass.get("$test");
   assertNotNull(b);
   
   boolean p = b.mightContain("842AAB10FBA04247B3A9CE00C9172350");
   
   BufferedReader br = new BufferedReader(new FileReader("data/c1x_cookies.csv"));
   String line = null;
   int nP = 0;
   int k = 0;
   while((line = br.readLine()) != null) {
   	p = b.mightContain(line);
   	if (p)
   		nP++;
   	k++;
   }
   assertTrue(k == nP);
}
 
Example #4
Source File: ScalableBloomFilter.java    From nexus-public with Eclipse Public License 1.0 6 votes vote down vote up
/**
 * @return the probability of encountering a false positive.
 */
public double expectedFpp() {
  double probabilitySum = 0.0;
  double combinatorialAnd = 0.0;

  List<Double> probabilities = filters.stream().mapToDouble(BloomFilter::expectedFpp).boxed().collect(toList());
  for (int i = 0; i < probabilities.size(); i++) {
    Double probability = probabilities.get(i);
    probabilitySum += probability;
    for (int j = i + 1; j < probabilities.size(); j++) {
      combinatorialAnd += (probability * probabilities.get(j));
    }
  }

  double andProbability = filters.stream().mapToDouble(BloomFilter::expectedFpp)
      .reduce((a , b) -> a * b)
      .getAsDouble();

  // These events are not mutually exclusive so the formula for calculating the probability is
  // P(A) + P(B) + P(C) ... - P(A and B) - P(A and C) - P(B and C) ... + P (A and B and C...)
  return probabilitySum - combinatorialAnd + andProbability;
}
 
Example #5
Source File: ProbableIntersectionCursorState.java    From fdb-record-layer with Apache License 2.0 6 votes vote down vote up
@Nonnull
static <T> ProbableIntersectionCursorState<T> from(
        @Nonnull Function<byte[], RecordCursor<T>> cursorFunction,
        @Nonnull BloomFilterCursorContinuation continuation,
        @Nonnull Function<? super T, ? extends List<Object>> comparisonKeyFunction,
        long expectedInsertions, double falsePositiveRate) {
    BloomFilter<List<Object>> bloomFilter;
    if (continuation.getBloomBytes() == null) {
        bloomFilter = BloomFilter.create(KeyFunnel.VERSION_0, expectedInsertions, falsePositiveRate);
    } else {
        try {
            bloomFilter = BloomFilter.readFrom(continuation.getBloomBytes().newInput(), KeyFunnel.VERSION_0);
        } catch (IOException e) {
            throw new RecordCoreException("unable to deserialize bloom filter", e);
        }
    }
    if (continuation.isChildEnd()) {
        return new ProbableIntersectionCursorState<>(RecordCursor.empty(), continuation, comparisonKeyFunction, bloomFilter, Collections.emptySet(), false);
    } else {
        return new ProbableIntersectionCursorState<>(cursorFunction.apply(continuation.getChild().toBytes()), continuation, comparisonKeyFunction, bloomFilter, new HashSet<>(), continuation.getBloomBytes() == null);
    }
}
 
Example #6
Source File: BloomFilterUtil.java    From datawave with Apache License 2.0 6 votes vote down vote up
/**
 * Create a BloomFilter based on a multi-map of fields
 * 
 * @param fields
 *            The fields and their values with which to create a bloom filter
 * @return a wrapped BloomFilter based on a multi-map of fields
 */
public BloomFilterWrapper newMultimapBasedFilter(final Multimap<String,NormalizedContentInterface> fields) {
    // Declare the return value
    final BloomFilter<String> filter;
    
    // Handle a non-null map of fields
    int fieldsApplied = 0;
    if (null != fields) {
        filter = MemberShipTest.create(fields.size());
        for (final Entry<String,NormalizedContentInterface> e : fields.entries()) {
            MemberShipTest.update(filter, e.getValue().getIndexedFieldValue());
            fieldsApplied++;
        }
    }
    // Handle a null set of fields
    else {
        filter = MemberShipTest.create(fieldsApplied);
    }
    
    final BloomFilterWrapper wrapper = new BloomFilterWrapper(filter);
    wrapper.setFieldValuesAppliedToFilter(fieldsApplied);
    return wrapper;
}
 
Example #7
Source File: TestBloom.java    From bidder with Apache License 2.0 6 votes vote down vote up
/**
 * Test a valid bid response.
 * @throws Exception on networking errors.
 */
@Test 
public void testBloom() throws Exception {
 
 
   new Bloom("$test","data/c1x_cookies.csv");
   BloomFilter b = (BloomFilter)LookingGlass.get("$test");
   assertNotNull(b);
   
   boolean p = b.mightContain("842AAB10FBA04247B3A9CE00C9172350");
   
   BufferedReader br = new BufferedReader(new FileReader("data/c1x_cookies.csv"));
   String line = null;
   int nP = 0;
   int k = 0;
   while((line = br.readLine()) != null) {
   	p = b.mightContain(line);
   	if (p)
   		nP++;
   	k++;
   }
   assertTrue(k == nP);
}
 
Example #8
Source File: XpTrackerService.java    From runelite with BSD 2-Clause "Simplified" License 6 votes vote down vote up
private BloomFilter<String> createFilter()
{
	final BloomFilter<String> filter = BloomFilter.create(
		Funnels.stringFunnel(Charset.defaultCharset()),
		BLOOMFILTER_EXPECTED_INSERTIONS
	);

	synchronized (usernameUpdateQueue)
	{
		for (String toUpdate : usernameUpdateQueue)
		{
			filter.put(toUpdate);
		}
	}

	return filter;
}
 
Example #9
Source File: PremiumList.java    From nomulus with Apache License 2.0 6 votes vote down vote up
/** Returns a new PremiumListRevision for the given key and premium list map. */
@VisibleForTesting
public static PremiumListRevision create(PremiumList parent, Set<String> premiumLabels) {
  PremiumListRevision revision = new PremiumListRevision();
  revision.parent = Key.create(parent);
  revision.revisionId = allocateId();
  // All premium list labels are already punycoded, so don't perform any further character
  // encoding on them.
  revision.probablePremiumLabels =
      BloomFilter.create(unencodedCharsFunnel(), premiumLabels.size());
  premiumLabels.forEach(revision.probablePremiumLabels::put);
  try {
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    revision.probablePremiumLabels.writeTo(bos);
    checkArgument(
        bos.size() <= MAX_BLOOM_FILTER_BYTES,
        "Too many premium labels were specified; Bloom filter exceeds max entity size");
  } catch (IOException e) {
    throw new IllegalStateException("Could not serialize premium labels Bloom filter", e);
  }
  return revision;
}
 
Example #10
Source File: ProbableIntersectionCursorState.java    From fdb-record-layer with Apache License 2.0 5 votes vote down vote up
private ProbableIntersectionCursorState(@Nonnull RecordCursor<T> cursor, @Nonnull BloomFilterCursorContinuation continuation,
                                @Nonnull Function<? super T, ? extends List<Object>> comparisonKeyFunction,
                                @Nonnull BloomFilter<List<Object>> bloomFilter,
                                @Nonnull Set<List<Object>> seenSet, boolean firstIteration) {
    super(cursor, continuation.getChild(), comparisonKeyFunction);
    this.bloomFilter = bloomFilter;
    this.seenSet = seenSet;
    this.firstIteration = firstIteration;
}
 
Example #11
Source File: AbstractNGramTokenizationStrategy.java    From datawave with Apache License 2.0 5 votes vote down vote up
/**
 * Constructor
 * 
 * @param filter
 *            Updated with n-grams tokenized from normalized content
 */
public AbstractNGramTokenizationStrategy(final BloomFilter<String> filter) {
    if (null == filter) {
        this.log.warn("Cannot create n-grams for bloom filter", new IllegalArgumentException("BloomFilter is null"));
    }
    this.setFilter(filter);
}
 
Example #12
Source File: TestBloomFiltersSpeed.java    From count-db with MIT License 5 votes vote down vote up
private static long readValues(BloomFilter<Long> bloomFilter2) {
    long start = System.currentTimeMillis();
    for (int i = 0; i < NUM_OF_VALUES; i++) {
        if (i % 3 == 0) {
            bloomFilter2.mightContain((long) i);
        }
    }
    return System.currentTimeMillis() - start;
}
 
Example #13
Source File: TestBloomFiltersSpeed.java    From count-db with MIT License 5 votes vote down vote up
public static void main(String[] args) {
    LongBloomFilter bloomFilter1 = new LongBloomFilter(NUM_OF_VALUES, 0.001);

    BloomFilter<Long> bloomFilter2 = BloomFilter.create((from, into) -> into.putLong(from), NUM_OF_VALUES, 0.001);

    LongCountsBloomFilter bloomFilter3 = new LongCountsBloomFilter(NUM_OF_VALUES, 0.001);
    Log.i("Writing values for filter 1 took " + putValues(bloomFilter1));
    Log.i("Writing values for filter 2 took " + putValues(bloomFilter2));
    Log.i("Writing values for filter 3 took " + putValues(bloomFilter3));

    Log.i("Reading values for filter 1 took " + readValues(bloomFilter1));
    Log.i("Reading values for filter 2 took " + readValues(bloomFilter2));
    Log.i("Reading values for filter 3 took " + readValues(bloomFilter3));
}
 
Example #14
Source File: SpillMap.java    From phoenix with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public MappedByteBufferMap(int id, int thresholdBytes, int pageInserts, SpillFile spillFile) {
    this.spillFile = spillFile;
    // size threshold of a page
    this.thresholdBytes = thresholdBytes;
    this.pageIndex = id;
    pageMap.clear();
    bFilter = BloomFilter.create(Funnels.byteArrayFunnel(), pageInserts);
    pagedIn = true;
    totalResultSize = 0;
    localDepth = 1;
    dirtyPage = true;
}
 
Example #15
Source File: NGramTokenizationStrategy.java    From datawave with Apache License 2.0 5 votes vote down vote up
/**
 * Applies a tokenized n-gram to the BloomFilter based on the specified normalized content
 * 
 * @param ngram
 *            An n-gram generated from the specified normalized content
 * @param content
 *            A normalized field name and value
 * @return true, if the n-gram was applied to the strategy's BloomFilter
 */
protected boolean updateFilter(final String ngram, final NormalizedContentInterface content) throws TokenizationException {
    boolean updated = super.updateFilter(ngram, content);
    if (!updated) {
        final BloomFilter<String> filter = this.getFilter();
        if ((null != ngram) && (null != filter)) {
            MemberShipTest.update(filter, ngram);
            updated = true;
        } else {
            updated = false;
        }
    }
    
    return updated;
}
 
Example #16
Source File: UniqueTransform.java    From datawave with Apache License 2.0 5 votes vote down vote up
public UniqueTransform(Set<String> fields) {
    this.fields = deconstruct(fields);
    this.bloom = BloomFilter.create(new ByteFunnel(), 500000, 1e-15);
    if (DEBUG) {
        this.seen = new HashSet<>();
    }
    if (log.isTraceEnabled())
        log.trace("unique fields: " + this.fields);
}
 
Example #17
Source File: ListsTest.java    From spring-boot-cookbook with Apache License 2.0 5 votes vote down vote up
/**
     * https://mp.weixin.qq.com/s/VGtH_DXI6paogOekrodixA
     */
    @Test
    public void testBloomFilterWithGuava() {
        StopWatch stopWatch = new StopWatch("guavaBloomFilterDemo");
        stopWatch.start("init");
        int num = 10000000;
        BloomFilter<Integer> bloomFilter = BloomFilter.create(Funnels.integerFunnel(), num, 0.01);
        for (int i = 0; i < num; i++) {
            bloomFilter.put(i);
        }
        stopWatch.stop();
        stopWatch.start("check-1");
        assertThat(bloomFilter.mightContain(1)).isTrue();
        stopWatch.stop();

        stopWatch.start("check-9999");
        assertThat(bloomFilter.mightContain(9999)).isTrue();
        stopWatch.stop();

        stopWatch.start("check-1234567");
        assertThat(bloomFilter.mightContain(1234567)).isTrue();
        stopWatch.stop();

        System.out.println(stopWatch.prettyPrint());

//        -----------------------------------------
//        ms     %     Task name
//        -----------------------------------------
//        04443  099%  init
//        00037  001%  check-1
//        00000  000%  check-9999
//        00000  000%  check-1234567

    }
 
Example #18
Source File: BloomFilters.java    From quarantyne with Apache License 2.0 5 votes vote down vote up
/**
 * Make a BF from its serialized form
 * @param asset
 * @return a {@link BloomFilter}
 */
public static BloomFilter<String> deserialize(Asset asset) throws AssetException {
  try {
    return BloomFilter.readFrom(asset.getBytes(), Funnels.stringFunnel(Charsets.UTF_8));
    } catch (IOException ioex) {
    throw new AssetException(ioex);
  }
}
 
Example #19
Source File: CompromisedPasswordClassifierTest.java    From quarantyne with Apache License 2.0 5 votes vote down vote up
@Test
public void testClassifier() {
  BloomFilter<String> bloom =
      BloomFilter.create(Funnels.stringFunnel(Charsets.UTF_8), 3);
  bloom.put("alpha");
  bloom.put("bravo");
  bloom.put("charlie");

  Supplier<Config> configSupplier = () -> Config.builder()
      .loginAction(new QIdentityAction("/login", "email", "password"))
      .registerAction(new QIdentityAction("/register", "email", "password"))
      .build();
  CompromisedPasswordClassifier classifier = new CompromisedPasswordClassifier(bloom, configSupplier);
  HttpRequest defaultRequest = TestHttpRequest.REQ();

  // null empty
  assertThat(classifier.classify(defaultRequest, null)).isEqualTo(Label.NONE);
  assertThat(classifier.classify(defaultRequest, TestHttpRequestBody.EMPTY)).isEqualTo(Label.NONE);

  // no key matches password
  assertThat(classifier.classify(defaultRequest,
      TestHttpRequestBody.make(new JsonObject().put("name", "john")))).isEqualTo(Label.NONE);

  // a key matches password but password is not in bloomf
  assertThat(classifier.classify(defaultRequest,
      TestHttpRequestBody.make(new JsonObject().put("password", "delta")))).isEqualTo(Label.NONE);

  // match
  HttpRequest requestOnPath = new TestHttpRequest.Builder().setPath("/login").build();
  assertThat(classifier.classify(requestOnPath,
      TestHttpRequestBody.make(new JsonObject().put("password", "bravo")))).isEqualTo(
          Label.COMPROMISED_PASSWORD);
}
 
Example #20
Source File: DisposableEmailClassifierTest.java    From quarantyne with Apache License 2.0 5 votes vote down vote up
@Test
public void testClassifier() {
  BloomFilter<String> bloom =
      BloomFilter.create(Funnels.stringFunnel(Charsets.UTF_8), 2);
  bloom.put("disposable.com");
  bloom.put("junk.com");

  Supplier<Config> configSupplier = () -> Config
      .builder()
      .emailParamKeys(Sets.newHashSet("email"))
      .registerAction(new QIdentityAction("/register", "email", "password"))
      .build();
  DisposableEmailClassifier classifier = new DisposableEmailClassifier(bloom, configSupplier);
  HttpRequest defaultRequest = TestHttpRequest.REQ();

  // null empty
  assertThat(classifier.classify(defaultRequest, null)).isEqualTo(Label.NONE);
  assertThat(classifier.classify(defaultRequest, TestHttpRequestBody.EMPTY)).isEqualTo(Label.NONE);

  // no key matches password
  assertThat(classifier.classify(defaultRequest,
      TestHttpRequestBody.make(new JsonObject().put("name", "john")))).isEqualTo(Label.NONE);

  // a key matches password but password is not in bloomf
  assertThat(classifier.classify(defaultRequest,
      TestHttpRequestBody.make(new JsonObject().put("email", "[email protected]")))).isEqualTo(Label.NONE);

  // match
  HttpRequest req = new TestHttpRequest.Builder().setPath("/register").build();
  assertThat(classifier.classify(req,
      TestHttpRequestBody.make(new JsonObject().put("email", "[email protected]")))).isEqualTo(
      Label.DISPOSABLE_EMAIL);
}
 
Example #21
Source File: BloomFilters.java    From quarantyne with Apache License 2.0 5 votes vote down vote up
/**
 * Make a BF from its serialized form
 * @param resourceName a {@link BloomFilters} value
 * @return a {@link BloomFilter}
 */
public static BloomFilter<String> deserialize(String resourceName) throws IOException{
  InputStream is =
      new BufferedInputStream(
          new ByteArrayInputStream(
              Resources.toByteArray(Resources.getResource(resourceName))));
  return BloomFilter.readFrom(is, FUNNEL);
}
 
Example #22
Source File: BloomFilterConverter.java    From nomulus with Apache License 2.0 5 votes vote down vote up
@Override
@Nullable
public byte[] convertToDatabaseColumn(@Nullable BloomFilter<String> entity) {
  if (entity == null) {
    return null;
  }
  ByteArrayOutputStream bos = new ByteArrayOutputStream();
  try {
    entity.writeTo(bos);
  } catch (IOException e) {
    throw new UncheckedIOException("Error saving Bloom filter data", e);
  }
  return bos.toByteArray();
}
 
Example #23
Source File: RecordSet.java    From db with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Retrains the bloom filter. Should be called intermittently.
 */
public void reBloom() {
    if(bloomChanged) {
        bloomFilter = BloomFilter.create(Funnels.integerFunnel(), Performance.HASH_BUCKET, 0.01);
        recordSet.forEach(record -> bloomFilter.put(record.getId()));
    }
}
 
Example #24
Source File: ScalableBloomFilter.java    From nexus-public with Eclipse Public License 1.0 5 votes vote down vote up
/**
 * Determines whether across all filters there is a chance that this element has already been added.
 *
 * @param input - the element to check.
 * @return whether the element may exist in the filter.
 */
public boolean mightContain(final T input) {
  for (BloomFilter<T> filter : filters) {
    if (filter.mightContain(input)) {
      return true;
    }
  }
  return false;
}
 
Example #25
Source File: PremiumListTest.java    From nomulus with Apache License 2.0 5 votes vote down vote up
@Test
public void bloomFilter_worksCorrectly() {
  BloomFilter<String> bloomFilter =
      PremiumList.create("testname", CurrencyUnit.USD, TEST_PRICES).getBloomFilter();
  ImmutableSet.of("silver", "gold", "palladium")
      .forEach(l -> assertThat(bloomFilter.mightContain(l)).isTrue());
  ImmutableSet.of("dirt", "pyrite", "zirconia")
      .forEach(l -> assertThat(bloomFilter.mightContain(l)).isFalse());
}
 
Example #26
Source File: Schema.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
public Schema(int size, BitSet attributes, FunctionalDependency primaryKey, List<FunctionalDependency> fdKeys, List<FunctionalDependency> fds, List<Schema> referencedSchemata, int[] minValueLengths, int[] maxValueLengths, int[] nullValueCounts, List<BloomFilter<CharSequence>> bloomFilters) {
	this.size = size;
	this.attributes = attributes;
	this.primaryKey = primaryKey;
	this.fdKeys = fdKeys;
	this.fds = fds;
	this.referencedSchemata = referencedSchemata;
	this.minValueLengths = minValueLengths;
	this.maxValueLengths = maxValueLengths;
	this.nullValueCounts = nullValueCounts;
	this.bloomFilters = bloomFilters;
}
 
Example #27
Source File: BloomFilterConverter.java    From nomulus with Apache License 2.0 5 votes vote down vote up
@Override
@Nullable
public BloomFilter<String> convertToEntityAttribute(@Nullable byte[] columnValue) {
  if (columnValue == null) {
    return null;
  }
  try {
    return BloomFilter.readFrom(new ByteArrayInputStream(columnValue), stringFunnel(US_ASCII));
  } catch (IOException e) {
    throw new UncheckedIOException("Error loading Bloom filter data", e);
  }
}
 
Example #28
Source File: PeerTable.java    From besu with Apache License 2.0 5 votes vote down vote up
private void buildBloomFilter() {
  final BloomFilter<Bytes> bf =
      BloomFilter.create((id, val) -> val.putBytes(id.toArray()), maxEntriesCnt, 0.001);
  streamAllPeers().map(Peer::getId).forEach(bf::put);
  this.evictionCnt = 0;
  this.idBloom = bf;
}
 
Example #29
Source File: BloomFilterConverterTest.java    From nomulus with Apache License 2.0 5 votes vote down vote up
@Test
public void roundTripConversion_returnsSameBloomFilter() {
  BloomFilter<String> bloomFilter = BloomFilter.create(stringFunnel(US_ASCII), 3);
  ImmutableSet.of("foo", "bar", "baz").forEach(bloomFilter::put);
  TestEntity entity = new TestEntity(bloomFilter);
  jpaTm().transact(() -> jpaTm().getEntityManager().persist(entity));
  TestEntity persisted =
      jpaTm().transact(() -> jpaTm().getEntityManager().find(TestEntity.class, "id"));
  assertThat(persisted.bloomFilter).isEqualTo(bloomFilter);
}
 
Example #30
Source File: PremiumList.java    From nomulus with Apache License 2.0 5 votes vote down vote up
private PremiumList(String name, CurrencyUnit currency, Map<String, BigDecimal> labelsToPrices) {
  this.name = name;
  this.currency = currency;
  this.labelsToPrices = labelsToPrices;
  // ASCII is used for the charset because all premium list domain labels are stored punycoded.
  this.bloomFilter = BloomFilter.create(stringFunnel(US_ASCII), labelsToPrices.size());
  labelsToPrices.keySet().forEach(this.bloomFilter::put);
}