org.apache.hadoop.util.bloom.Filter Java Examples

The following examples show how to use org.apache.hadoop.util.bloom.Filter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BloomContainsUDF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nullable
public Boolean evaluate(@Nullable Text bloomStr, @Nullable List<Text> keys)
        throws HiveException {
    if (bloomStr == null) {
        return null;
    }
    if (keys == null) {
        return Boolean.FALSE;
    }

    final Filter bloom = getFilter(bloomStr);

    for (Text keyStr : keys) {
        if (keyStr == null) {
            continue;
        }
        key.set(keyStr.copyBytes(), 1.0d);
        if (bloom.membershipTest(key) == false) {
            return Boolean.FALSE;
        }
    }

    return Boolean.TRUE;
}
 
Example #2
Source File: BloomContainsUDF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nonnull
private Filter getFilter(@Nonnull final Text bloomStr) throws HiveException {
    final Filter bloom;
    if (prevBf != null && prevBfStr.equals(bloomStr)) {
        bloom = prevBf;
    } else {
        try {
            bloom = BloomFilterUtils.deserialize(bloomStr, new DynamicBloomFilter());
        } catch (IOException e) {
            throw new HiveException(e);
        }
        this.prevBfStr = new Text(bloomStr);
        this.prevBf = bloom;
    }
    return bloom;
}
 
Example #3
Source File: BloomContainsAnyUDF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nullable
public Boolean evaluate(@Nullable Text bloomStr, @Nullable List<Text> keys)
        throws HiveException {
    if (bloomStr == null) {
        return null;
    }
    if (keys == null) {
        return Boolean.FALSE;
    }

    final Filter bloom = getFilter(bloomStr);

    for (Text keyStr : keys) {
        if (keyStr == null) {
            continue;
        }
        key.set(keyStr.copyBytes(), 1.0d);
        if (bloom.membershipTest(key)) {
            return Boolean.TRUE;
        }
    }

    return Boolean.FALSE;
}
 
Example #4
Source File: BloomContainsAnyUDF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nonnull
private Filter getFilter(@Nonnull final Text bloomStr) throws HiveException {
    final Filter bloom;
    if (prevBf != null && prevBfStr.equals(bloomStr)) {
        bloom = prevBf;
    } else {
        try {
            bloom = BloomFilterUtils.deserialize(bloomStr, new DynamicBloomFilter());
        } catch (IOException e) {
            throw new HiveException(e);
        }
        this.prevBfStr = new Text(bloomStr);
        this.prevBf = bloom;
    }
    return bloom;
}
 
Example #5
Source File: BloomFilterUtils.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Nonnull
public static byte[] serialize(@Nonnull final Filter filter) throws IOException {
    FastByteArrayOutputStream bos = new FastByteArrayOutputStream();
    Base91OutputStream base91 = new Base91OutputStream(bos);
    DataOutputStream out = new DataOutputStream(base91);
    filter.write(out);
    out.flush();
    base91.finish();
    return bos.toByteArray();
}
 
Example #6
Source File: BloomFilterUtils.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Nonnull
public static Text serialize(@Nonnull final Filter filter, @Nonnull final Text dst)
        throws IOException {
    FastByteArrayOutputStream bos = new FastByteArrayOutputStream();
    Base91OutputStream base91 = new Base91OutputStream(bos);
    DataOutputStream out = new DataOutputStream(base91);
    filter.write(out);
    out.flush();
    base91.finish();
    dst.set(bos.getInternalArray(), 0, bos.size());
    return dst;
}
 
Example #7
Source File: BloomFilterUtils.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Nonnull
public static <F extends Filter> F deserialize(@Nonnull final byte[] buf,
        @Nonnegative final int offset, @Nonnegative final int len, @Nonnull final F dst)
        throws IOException {
    FastByteArrayInputStream fis = new FastByteArrayInputStream(buf, offset, len);
    DataInput in = new DataInputStream(new Base91InputStream(fis));
    dst.readFields(in);
    return dst;
}
 
Example #8
Source File: BloomContainsUDF.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Nullable
public Boolean evaluate(@Nullable Text bloomStr, @Nullable Text keyStr) throws HiveException {
    if (bloomStr == null) {
        return null;
    }
    if (keyStr == null) {
        return Boolean.FALSE;
    }

    Filter bloom = getFilter(bloomStr);
    key.set(keyStr.copyBytes(), 1.0d);
    return Boolean.valueOf(bloom.membershipTest(key));
}
 
Example #9
Source File: BloomOrUDFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
private static void assertEquals(@Nonnull Filter expected, @Nonnull Filter actual, long seed,
        int size) {
    final Key key = new Key();

    final Random rnd1 = new Random(seed);
    for (int i = 0; i < size; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);
        key.set(s.getBytes(), 1.0);
        Assert.assertEquals(expected.membershipTest(key), actual.membershipTest(key));
    }
}
 
Example #10
Source File: BloomAndUDFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
private static void assertNotContains(@Nonnull Filter expected, @Nonnull Filter actual,
        long seed, int size) {
    final Key key = new Key();

    final Random rnd1 = new Random(seed);
    for (int i = 0; i < size; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);
        key.set(s.getBytes(), 1.0);
        Assert.assertEquals(expected.membershipTest(key), actual.membershipTest(key));
    }
}
 
Example #11
Source File: BloomFilterUtils.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
@Nonnull
public static <F extends Filter> F deserialize(@Nonnull final Text in, @Nonnull final F dst)
        throws IOException {
    return deserialize(in.getBytes(), 0, in.getLength(), dst);
}
 
Example #12
Source File: BloomFilterUtils.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
@Nonnull
public static <F extends Filter> F deserialize(@Nonnull final byte[] buf, @Nonnull final F dst)
        throws IOException {
    return deserialize(buf, 0, buf.length, dst);
}
 
Example #13
Source File: BloomMapFile.java    From hadoop with Apache License 2.0 2 votes vote down vote up
/**
 * Retrieve the Bloom filter used by this instance of the Reader.
 * @return a Bloom filter (see {@link Filter})
 */
public Filter getBloomFilter() {
  return bloomFilter;
}
 
Example #14
Source File: BloomMapFile.java    From big-c with Apache License 2.0 2 votes vote down vote up
/**
 * Retrieve the Bloom filter used by this instance of the Reader.
 * @return a Bloom filter (see {@link Filter})
 */
public Filter getBloomFilter() {
  return bloomFilter;
}
 
Example #15
Source File: BloomMapFile.java    From RDFS with Apache License 2.0 2 votes vote down vote up
/**
 * Retrieve the Bloom filter used by this instance of the Reader.
 * @return a Bloom filter (see {@link Filter})
 */
public Filter getBloomFilter() {
  return bloomFilter;
}
 
Example #16
Source File: BloomMapFile.java    From hadoop-gpu with Apache License 2.0 2 votes vote down vote up
/**
 * Retrieve the Bloom filter used by this instance of the Reader.
 * @return a Bloom filter (see {@link Filter})
 */
public Filter getBloomFilter() {
  return bloomFilter;
}