org.apache.hadoop.util.bloom.Key Java Examples

The following examples show how to use org.apache.hadoop.util.bloom.Key. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: InternalDynamicBloomFilter.java    From hudi with Apache License 2.0 6 votes vote down vote up
@Override
public void add(Key key) {
  if (key == null) {
    throw new NullPointerException("Key can not be null");
  }

  org.apache.hadoop.util.bloom.BloomFilter bf = getActiveStandardBF();

  if (bf == null) {
    addRow();
    bf = matrix[matrix.length - 1];
    currentNbRecord = 0;
  }

  bf.add(key);

  currentNbRecord++;
}
 
Example #2
Source File: BloomFilter.java    From hadoop-map-reduce-patterns with Apache License 2.0 6 votes vote down vote up
@Override
public void map(Object key, Text value, Context context)
		throws IOException, InterruptedException {
	Map<String, String> parsed = transformXmlToMap(value.toString());

	String body = parsed.get("Text");
	if (isNullOrEmpty(body)) {
		return;
	}
	StringTokenizer tokenizer = new StringTokenizer(body);
	while (tokenizer.hasMoreTokens()) {
		String word = tokenizer.nextToken();
		if (filter.membershipTest(new Key(word.getBytes()))) {
			context.write(value, NullWritable.get());
			break;
		}
	}

}
 
Example #3
Source File: BuildBloom.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public Tuple exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0) return null;

    // Strip off the initial level of bag
    DataBag values = (DataBag)input.get(0);
    Iterator<Tuple> it = values.iterator();
    Tuple t = it.next();

    // If the input tuple has only one field, then we'll extract
    // that field and serialize it into a key.  If it has multiple
    // fields, we'll serialize the whole tuple.
    byte[] b;
    if (t.size() == 1) b = DataType.toBytes(t.get(0));
    else b = DataType.toBytes(t, DataType.TUPLE);

    Key k = new Key(b);
    filter = new BloomFilter(vSize, numHash, hType);
    filter.add(k);

    return TupleFactory.getInstance().newTuple(bloomOut());
}
 
Example #4
Source File: DistinctAggregator.java    From compiler with Apache License 2.0 6 votes vote down vote up
/** {@inheritDoc} */
@Override
public void aggregate(final String data, final String metadata) throws IOException, InterruptedException {
	// instantiate a bloom filter input key initialized by the data
	Key key = new Key(data.getBytes());

	// if the key is already in the filter, forget it
	if (this.filter.membershipTest(key))
		return;

	// add the key to the bloom filter
	this.filter.add(key);

	// and collect it
	this.collect(data);
}
 
Example #5
Source File: UniqueAggregator.java    From compiler with Apache License 2.0 6 votes vote down vote up
/** {@inheritDoc} */
@Override
public void aggregate(final String data, final String metadata) throws IOException, InterruptedException {
	// instantiate a bloom filter input key initialized by the data
	final Key key = new Key(data.getBytes());

	// if the key is already in the filter, forget about it
	if (this.filter.membershipTest(key))
		return;

	// add the key to the bloom filter
	this.filter.add(key);

	if (this.isCombining())
		this.collect(data);
	else
		this.total++;
}
 
Example #6
Source File: BloomContainsUDFTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nonnull
private static DynamicBloomFilter createBloomFilter(long seed, int size) {
    DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(30);
    final Key key = new Key();

    final Random rnd1 = new Random(seed);
    for (int i = 0; i < size; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);
        Text t = new Text(s);
        key.set(t.copyBytes(), 1.0);
        dbf.add(key);
    }

    return dbf;
}
 
Example #7
Source File: BloomNotUDFTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nonnull
private static DynamicBloomFilter createBloomFilter(long seed, int size) {
    DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(3000);
    final Key key = new Key();

    final Random rnd1 = new Random(seed);
    for (int i = 0; i < size; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);

        key.set(s.getBytes(), 1.0);
        dbf.add(key);
    }

    return dbf;
}
 
Example #8
Source File: BloomOrUDFTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nonnull
private static DynamicBloomFilter createBloomFilter(long seed, int size) {
    DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(3000);
    final Key key = new Key();

    final Random rnd1 = new Random(seed);
    for (int i = 0; i < size; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);

        key.set(s.getBytes(), 1.0);
        dbf.add(key);
    }

    return dbf;
}
 
Example #9
Source File: BloomAndUDFTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nonnull
private static DynamicBloomFilter createBloomFilter(long seed, int size) {
    DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(3000);
    final Key key = new Key();

    final Random rnd1 = new Random(seed);
    for (int i = 0; i < size; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);

        key.set(s.getBytes(), 1.0);
        dbf.add(key);
    }

    return dbf;
}
 
Example #10
Source File: SimpleBloomFilter.java    From hudi with Apache License 2.0 5 votes vote down vote up
@Override
public void add(String key) {
  if (key == null) {
    throw new NullPointerException("Key cannot by null");
  }
  filter.add(new Key(key.getBytes(StandardCharsets.UTF_8)));
}
 
Example #11
Source File: ReduceSideJoinBloomFilter.java    From hadoop-map-reduce-patterns with Apache License 2.0 5 votes vote down vote up
public void map(Object key, Text value, Context context)
		throws IOException, InterruptedException {
	Map<String, String> parsed = MRDPUtils.transformXmlToMap(value
			.toString());
	String userId = parsed.get("UserId");
	if (userId == null) {
		return;
	}
	if (bfilter.membershipTest(new Key(userId.getBytes()))) {
		outkey.set(userId);
		outvalue.set("B" + value.toString());
		context.write(outkey, outvalue);
	}
}
 
Example #12
Source File: Bloom.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Boolean exec(Tuple input) throws IOException {
    if (filter == null) {
        init();
    }
    byte[] b;
    if (input.size() == 1) b = DataType.toBytes(input.get(0));
    else b = DataType.toBytes(input, DataType.TUPLE);

    Key k = new Key(b);
    return filter.membershipTest(k);
}
 
Example #13
Source File: BloomFilterCreator.java    From hiped2 with Apache License 2.0 5 votes vote down vote up
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
  User user = User.fromText(value);
  if ("CA".equals(user.getState())) {
    filter.add(new Key(user.getName().getBytes()));
  }
}
 
Example #14
Source File: BloomJoin.java    From hiped2 with Apache License 2.0 5 votes vote down vote up
@Override
protected void map(LongWritable offset, Text value, Context context)
    throws IOException, InterruptedException {
  String user = getUsername(value);
  if (filter.membershipTest(new Key(user.getBytes()))) {
    Tuple outputValue = new Tuple();
    outputValue.setInt(ValueFields.DATASET, getDataset());
    outputValue.setString(ValueFields.DATA, value.toString());

    context.write(new Text(user), outputValue);
  }
}
 
Example #15
Source File: BloomFilterCreator.java    From hiped2 with Apache License 2.0 5 votes vote down vote up
@Override
public void map(Text key, Text value,
                OutputCollector<NullWritable, BloomFilter> output,
                Reporter reporter) throws IOException {

  System.out.println("K[" + key + "]");

  int age = Integer.valueOf(value.toString());
  if (age > 30) {
    filter.add(new Key(key.toString().getBytes()));
  }
  collector = output;
}
 
Example #16
Source File: BloomJoin.java    From hiped2 with Apache License 2.0 5 votes vote down vote up
@Override
protected void map(Text key, Text value, Context context)
    throws IOException, InterruptedException {
  System.out.println("K[" + key + "]");
  if(filter.membershipTest(new Key(key.toString().getBytes()))) {
    context.write(key, value);
  }
}
 
Example #17
Source File: BloomOrUDFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
private static void assertEquals(@Nonnull Filter expected, @Nonnull Filter actual, long seed,
        int size) {
    final Key key = new Key();

    final Random rnd1 = new Random(seed);
    for (int i = 0; i < size; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);
        key.set(s.getBytes(), 1.0);
        Assert.assertEquals(expected.membershipTest(key), actual.membershipTest(key));
    }
}
 
Example #18
Source File: SimpleBloomFilter.java    From hudi with Apache License 2.0 5 votes vote down vote up
@Override
public boolean mightContain(String key) {
  if (key == null) {
    throw new NullPointerException("Key cannot by null");
  }
  return filter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8)));
}
 
Example #19
Source File: InternalDynamicBloomFilter.java    From hudi with Apache License 2.0 5 votes vote down vote up
@Override
public boolean membershipTest(Key key) {
  if (key == null) {
    return true;
  }

  for (BloomFilter bloomFilter : matrix) {
    if (bloomFilter.membershipTest(key)) {
      return true;
    }
  }

  return false;
}
 
Example #20
Source File: InternalFilter.java    From hudi with Apache License 2.0 5 votes vote down vote up
/**
 * Adds an array of keys to <i>this</i> filter.
 *
 * @param keys The array of keys.
 */
public void add(Key[] keys) {
  if (keys == null) {
    throw new IllegalArgumentException("Key[] may not be null");
  }
  for (Key key : keys) {
    add(key);
  }
}
 
Example #21
Source File: InternalFilter.java    From hudi with Apache License 2.0 5 votes vote down vote up
/**
 * Adds a collection of keys to <i>this</i> filter.
 *
 * @param keys The collection of keys.
 */
public void add(Collection<Key> keys) {
  if (keys == null) {
    throw new IllegalArgumentException("Collection<Key> may not be null");
  }
  for (Key key : keys) {
    add(key);
  }
}
 
Example #22
Source File: InternalFilter.java    From hudi with Apache License 2.0 5 votes vote down vote up
/**
 * Adds a list of keys to <i>this</i> filter.
 *
 * @param keys The list of keys.
 */
public void add(List<Key> keys) {
  if (keys == null) {
    throw new IllegalArgumentException("ArrayList<Key> may not be null");
  }

  for (Key key : keys) {
    add(key);
  }
}
 
Example #23
Source File: BloomAndUDFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
private static void assertNotContains(@Nonnull Filter expected, @Nonnull Filter actual,
        long seed, int size) {
    final Key key = new Key();

    final Random rnd1 = new Random(seed);
    for (int i = 0; i < size; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);
        key.set(s.getBytes(), 1.0);
        Assert.assertEquals(expected.membershipTest(key), actual.membershipTest(key));
    }
}
 
Example #24
Source File: BloomFilterUDAF.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
@Override
public void init() {
    this.filter = BloomFilterUtils.newDynamicBloomFilter();
    this.key = new Key();
}
 
Example #25
Source File: HoodieDynamicBoundedBloomFilter.java    From hudi with Apache License 2.0 4 votes vote down vote up
@Override
public boolean mightContain(String key) {
  return internalDynamicBloomFilter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8)));
}
 
Example #26
Source File: HoodieDynamicBoundedBloomFilter.java    From hudi with Apache License 2.0 4 votes vote down vote up
@Override
public void add(String key) {
  internalDynamicBloomFilter.add(new Key(key.getBytes(StandardCharsets.UTF_8)));
}
 
Example #27
Source File: InternalFilter.java    From hudi with Apache License 2.0 2 votes vote down vote up
/**
 * Determines wether a specified key belongs to <i>this</i> filter.
 *
 * @param key The key to test.
 * @return boolean True if the specified key belongs to <i>this</i> filter. False otherwise.
 */
public abstract boolean membershipTest(Key key);
 
Example #28
Source File: InternalFilter.java    From hudi with Apache License 2.0 2 votes vote down vote up
/**
 * Adds a key to <i>this</i> filter.
 *
 * @param key The key to add.
 */
public abstract void add(Key key);