Java Code Examples for com.google.common.hash.Hashing#murmur3_128()

The following examples show how to use com.google.common.hash.Hashing#murmur3_128() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FactDistinctColumnsMapper.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public CuboidStatCalculator(int id, int[] rowkeyColIndex, Long[] cuboidIds, Integer[][] cuboidsBitSet,
        boolean isUsePutRowKeyToHllNewAlgorithm, HLLCounter[] cuboidsHLL) {
    this.id = id;
    this.nRowKey = rowkeyColIndex.length;
    this.rowkeyColIndex = rowkeyColIndex;
    this.cuboidIds = cuboidIds;
    this.cuboidsBitSet = cuboidsBitSet;
    this.isNewAlgorithm = isUsePutRowKeyToHllNewAlgorithm;
    if (!isNewAlgorithm) {
        this.hf = Hashing.murmur3_32();
    } else {
        rowHashCodesLong = new long[nRowKey];
        this.hf = Hashing.murmur3_128();
    }
    this.cuboidsHLL = cuboidsHLL;
    workThread = new Thread(this);
}
 
Example 2
Source File: Hasher.java    From datafu with Apache License 2.0 6 votes vote down vote up
/**
 * Returns the HashFunction named by algorithm
 *
 * See the Hasher class docs for a list of algorithms and guidance on selection.
 *
 * @param algorithm the hash algorithm to use
 * @throws IllegalArgumentException for an invalid seed given the algorithm
 * @throws RuntimeException when the seed cannot be parsed
 */
private void makeHashFunc(String algorithm) throws IllegalArgumentException, RuntimeException
{
  if (hash_func != null) { throw new RuntimeException("The hash function should only be set once per instance"); }

  if (algorithm.startsWith("good-")) {
    int bits = Integer.parseInt(algorithm.substring(5));
    hash_func = Hashing.goodFastHash(bits);
  }
  else if (algorithm.equals("murmur3-32")) { hash_func = Hashing.murmur3_32();  }
  else if (algorithm.equals("murmur3-128")){ hash_func = Hashing.murmur3_128(); }
  else if (algorithm.equals("sip24"))      { hash_func = Hashing.sipHash24();   }
  else if (algorithm.equals("sha1"))       { hash_func = Hashing.sha1();        }
  else if (algorithm.equals("sha256"))     { hash_func = Hashing.sha256();      }
  else if (algorithm.equals("sha512"))     { hash_func = Hashing.sha512();      }
  else if (algorithm.equals("md5"))        { hash_func = Hashing.md5();         }
  else if (algorithm.equals("adler32"))    { hash_func = Hashing.adler32();     }
  else if (algorithm.equals("crc32"))      { hash_func = Hashing.crc32();       }
  else { throw new IllegalArgumentException("No hash function found for algorithm "+algorithm+". Allowed values include "+HASH_NAMES); }
}
 
Example 3
Source File: TestMurmur3.java    From hyperloglog with Apache License 2.0 6 votes vote down vote up
@Test
public void testHashCodesM3_128_longs() {
  int seed = 123;
  Random rand = new Random(seed);
  HashFunction hf = Hashing.murmur3_128(seed);
  for (int i = 0; i < 1000; i++) {
    long val = rand.nextLong();
    byte[] data = ByteBuffer.allocate(8).putLong(val).array();
    // guava stores the hashcodes in little endian order
    ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
    buf.put(hf.hashBytes(data).asBytes());
    buf.flip();
    long gl1 = buf.getLong();
    long gl2 = buf.getLong(8);
    long[] hc = Murmur3.hash128(data, data.length, seed);
    long m1 = hc[0];
    long m2 = hc[1];
    assertEquals(gl1, m1);
    assertEquals(gl2, m2);
  }
}
 
Example 4
Source File: HashUtils.java    From MHAP with Apache License 2.0 6 votes vote down vote up
public final static long[][] computeNGramHashesExact(final String seq, final int nGramSize, final int numWords, final int seed)
{
	HashFunction hf = Hashing.murmur3_128(seed);

	long[][] hashes = new long[seq.length() - nGramSize + 1][numWords];
	for (int iter = 0; iter < hashes.length; iter++)
	{
		String subStr = seq.substring(iter, iter + nGramSize);
		
		for (int word=0; word<numWords; word++)
		{
			HashCode hc = hf.newHasher().putUnencodedChars(subStr).putInt(word).hash();
			hashes[iter][word] = hc.asLong();
		}
	}
	
	return hashes;
}
 
Example 5
Source File: HashUtils.java    From MHAP with Apache License 2.0 6 votes vote down vote up
public final static long[] computeSequenceHashesLong(final String seq, final int nGramSize, final int seed, final boolean doReverseCompliment)
{
	HashFunction hf = Hashing.murmur3_128(seed);

	long[] hashes = new long[seq.length() - nGramSize + 1];
	for (int iter = 0; iter < hashes.length; iter++)
	{
		String str = seq.substring(iter, iter + nGramSize);
		String strReverse = null;
		if (doReverseCompliment)
		{
			strReverse  = Utils.rc(str);
			if (strReverse.compareTo(str)<0)
				str = strReverse;
		}
		
		HashCode hc = hf.newHasher().putUnencodedChars(str).hash();
		hashes[iter] = hc.asLong();
	}

	return hashes;
}
 
Example 6
Source File: BinaryParser.java    From BUbiNG with Apache License 2.0 5 votes vote down vote up
/** Return the hash function corresponding to a given message-digest algorithm given by name.
 *
 * @param messageDigest a message-digest algorithm (e.g., <code>MurmurHash3</code> or <code>MD5</code>); {@code null} if {@code messageDigest} is the empty string.
 */
@SuppressWarnings("deprecation")
public final static HashFunction forName(final String messageDigest) throws NoSuchAlgorithmException {
	if ("".equals(messageDigest)) return null;
	if ("MD5".equalsIgnoreCase(messageDigest)) return Hashing.md5();
	if ("MurmurHash3".equalsIgnoreCase(messageDigest)) return Hashing.murmur3_128();
	throw new NoSuchAlgorithmException("Unknown hash function " + messageDigest);
}
 
Example 7
Source File: CustomJarOutputStreamTest.java    From buck with Apache License 2.0 5 votes vote down vote up
@Test
public void manifestContainsEntryHashesOfHashedEntries() throws IOException {
  String entryName = "A";
  InputStream contents = new ByteArrayInputStream("contents".getBytes(StandardCharsets.UTF_8));
  try (HashingInputStream hashingContents =
      new HashingInputStream(Hashing.murmur3_128(), contents)) {
    writer.writeEntry(entryName, hashingContents);
    writer.close();

    String expectedHash = hashingContents.hash().toString();
    assertEntryHash(entryName, expectedHash);
  }
}
 
Example 8
Source File: CustomJarOutputStreamTest.java    From buck with Apache License 2.0 5 votes vote down vote up
@Test
public void manifestContainsEntryHashesOfEmptyHashedEntries() throws IOException {
  String entryName = "A";
  InputStream contents = new ByteArrayInputStream(new byte[0]);
  try (HashingInputStream hashingContents =
      new HashingInputStream(Hashing.murmur3_128(), contents)) {
    writer.putNextEntry(new CustomZipEntry(entryName));
    writer.closeEntry();
    writer.close();

    String expectedHash = hashingContents.hash().toString();
    assertEntryHash(entryName, expectedHash);
  }
}
 
Example 9
Source File: TestMurmur3.java    From hyperloglog with Apache License 2.0 5 votes vote down vote up
@Test
public void testHashCodesM3_128_string() {
  String key = "test";
  int seed = 123;
  HashFunction hf = Hashing.murmur3_128(seed);
  // guava stores the hashcodes in little endian order
  ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
  buf.put(hf.hashBytes(key.getBytes()).asBytes());
  buf.flip();
  long gl1 = buf.getLong();
  long gl2 = buf.getLong(8);
  long[] hc = Murmur3.hash128(key.getBytes(), key.getBytes().length, seed);
  long m1 = hc[0];
  long m2 = hc[1];
  assertEquals(gl1, m1);
  assertEquals(gl2, m2);

  key = "testkey128_testkey128";
  buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
  buf.put(hf.hashBytes(key.getBytes()).asBytes());
  buf.flip();
  gl1 = buf.getLong();
  gl2 = buf.getLong(8);
  hc = Murmur3.hash128(key.getBytes(), key.getBytes().length, seed);
  m1 = hc[0];
  m2 = hc[1];
  assertEquals(gl1, m1);
  assertEquals(gl2, m2);
}
 
Example 10
Source File: TargetsCommand.java    From buck with Apache License 2.0 5 votes vote down vote up
private HashFunction getHashFunction() {
  switch (targetHashFunction) {
    case SHA1:
      return Hashing.sha1();
    case MURMUR_HASH3:
      return Hashing.murmur3_128();
  }
  throw new UnsupportedOperationException();
}
 
Example 11
Source File: Function0.java    From paraflow with Apache License 2.0 5 votes vote down vote up
@JsonCreator
public Function0(@JsonProperty("seed") int seed,
                 @JsonProperty("fiberNum") int fiberNum)
{
    this.seed = seed;
    this.fiberNum = fiberNum;
    this.hasher = Hashing.murmur3_128(seed);
}
 
Example 12
Source File: Segment.java    From emodb with Apache License 2.0 5 votes vote down vote up
private HashCode hash(ByteBuffer buf) {
    HashFunction hashFn = Hashing.murmur3_128();
    if (buf.hasArray()) {
        return hashFn.hashBytes(buf.array(), buf.arrayOffset() + buf.position(), buf.remaining());
    } else {
        return hashFn.hashBytes(ByteBufferUtil.getArray(buf));
    }
}
 
Example 13
Source File: StatsField.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** 
 * Creates an HllOptions based on the (local) params specified (if appropriate).
 *
 * @param localParams the LocalParams for this {@link StatsField}
 * @param field the field corresponding to this {@link StatsField}, may be null if these stats are over a value source
 * @return the {@link HllOptions} to use based on the params, or null if no {@link HLL} should be computed
 * @throws SolrException if there are invalid options
 */
public static HllOptions parseHllOptions(SolrParams localParams, SchemaField field) 
  throws SolrException {

  String cardinalityOpt = localParams.get(Stat.cardinality.name());
  if (StringUtils.isBlank(cardinalityOpt)) {
    return null;
  }

  final NumberType hashableNumType = getHashableNumericType(field);

  // some sane defaults
  int log2m = 13;   // roughly equivalent to "cardinality='0.33'"
  int regwidth = 6; // with decent hash, this is plenty for all valid long hashes

  if (NumberType.FLOAT.equals(hashableNumType) || NumberType.INTEGER.equals(hashableNumType)) {
    // for 32bit values, we can adjust our default regwidth down a bit
    regwidth--;

    // NOTE: EnumField uses LegacyNumericType.INT, and in theory we could be super conservative
    // with it, but there's no point - just let the EXPLICIT HLL handle it
  }

  // TODO: we could attempt additional reductions in the default regwidth based on index
  // statistics -- but thta doesn't seem worth the effort.  for tiny indexes, the 
  // EXPLICIT and SPARSE HLL representations have us nicely covered, and in general we don't 
  // want to be too aggresive about lowering regwidth or we could really poor results if 
  // log2m is also low and  there is heavy hashkey collision

  try {
    // NFE will short out here if it's not a number
    final double accuracyOpt = Double.parseDouble(cardinalityOpt);

    // if a float between 0 and 1 is specified, treat it as a prefrence of accuracy
    // - 0 means accuracy is not a concern, save RAM
    // - 1 means be as accurate as possible, using as much RAM as needed.

    if (accuracyOpt < 0D || 1.0D < accuracyOpt) {
      throw new SolrException(ErrorCode.BAD_REQUEST, ERR);
    }

    // use accuracyOpt as a scaling factor between min & max legal log2m values
    log2m = HLL.MINIMUM_LOG2M_PARAM
      + (int) Math.round(accuracyOpt * (HLL.MAXIMUM_LOG2M_PARAM - HLL.MINIMUM_LOG2M_PARAM));

    // use accuracyOpt as a scaling factor for regwidth as well, BUT...
    // be more conservative -- HLL.MIN_REGWIDTH_PARAM is too absurdly low to be useful
    // use previously computed (hashableNumType) default regwidth -1 as lower bound for scaling
    final int MIN_HUERISTIC_REGWIDTH = regwidth-1;
    regwidth = MIN_HUERISTIC_REGWIDTH
      + (int) Math.round(accuracyOpt * (HLL.MAXIMUM_REGWIDTH_PARAM - MIN_HUERISTIC_REGWIDTH));

  } catch (NumberFormatException nfe) {
    // param value isn't a number -- let's check for simple true/false
    if (! localParams.getBool(Stat.cardinality.name(), false)) {
      return null;
    }
  }

  // let explicit params override both the default and/or any accuracy specification
  log2m = localParams.getInt("hllLog2m", log2m);
  regwidth = localParams.getInt("hllRegwidth", regwidth);

  // validate legal values
  if (log2m < HLL.MINIMUM_LOG2M_PARAM || HLL.MAXIMUM_LOG2M_PARAM < log2m) {
    throw new SolrException(ErrorCode.BAD_REQUEST, "hllLog2m must be at least " + 
                            HLL.MINIMUM_LOG2M_PARAM + " and at most " + HLL.MAXIMUM_LOG2M_PARAM
                            + " (" + log2m +")");
  }
  if (regwidth < HLL.MINIMUM_REGWIDTH_PARAM || HLL.MAXIMUM_REGWIDTH_PARAM < regwidth) {
    throw new SolrException(ErrorCode.BAD_REQUEST, "hllRegwidth must be at least " + 
                            HLL.MINIMUM_REGWIDTH_PARAM + " and at most " + HLL.MAXIMUM_REGWIDTH_PARAM);
  }
  
  HashFunction hasher = localParams.getBool("hllPreHashed", false) ? null : Hashing.murmur3_128();

  if (null == hasher) {
    // if this is a function, or a non Long field, pre-hashed is invalid
    // NOTE: we ignore hashableNumType - it's LONG for non numerics like Strings
    if (null == field || !(NumberType.LONG.equals(field.getType().getNumberType()) || NumberType.DATE.equals(field.getType().getNumberType()))) { 
      throw new SolrException(ErrorCode.BAD_REQUEST, "hllPreHashed is only supported with Long based fields");
    }
  }

  // if we're still here, then we need an HLL...
  return new HllOptions(log2m, regwidth, hasher);
}
 
Example 14
Source File: HLLCounter.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
public HLLCounter() {
    this(10, RegisterType.SINGLE_VALUE, Hashing.murmur3_128());
}
 
Example 15
Source File: HyperLogLogPlusCounter.java    From Kylin with Apache License 2.0 4 votes vote down vote up
public HyperLogLogPlusCounter(int p) {
    this(p, Hashing.murmur3_128());
}
 
Example 16
Source File: RendezvousHash.java    From xrpc with Apache License 2.0 4 votes vote down vote up
public RendezvousHash(Funnel<N> nodeFunnel, Collection<? extends N> init) {
  this.hasher = Hashing.murmur3_128();
  this.nodeFunnel = nodeFunnel;

  nodeList.addAll(init);
}
 
Example 17
Source File: RendezvousHash.java    From xio with Apache License 2.0 4 votes vote down vote up
public RendezvousHash(Funnel<T> nodeFunnel, Collection<? extends T> init) {
  this.hasher = Hashing.murmur3_128();
  this.nodeFunnel = nodeFunnel;

  nodeList.addAll(init);
}
 
Example 18
Source File: HLLCounterOld.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
public HLLCounterOld(int p) {
    this(p, Hashing.murmur3_128());
}
 
Example 19
Source File: HLLCounter.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
public HLLCounter(int p, RegisterType type) {
    this(p, type, Hashing.murmur3_128());
}
 
Example 20
Source File: HLLCounter.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
public HLLCounter(int p) {
    this(p, RegisterType.SINGLE_VALUE, Hashing.murmur3_128());
}