Java Code Examples for com.google.common.hash.Hashing#murmur3_32()

The following examples show how to use com.google.common.hash.Hashing#murmur3_32() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HashingUtil.java    From datacollector with Apache License 2.0 6 votes vote down vote up
public static HashFunction getHasher(HashType hashType) {
  switch(hashType) {
    case MURMUR3_128:
      return Hashing.murmur3_128();
    case MURMUR3_32:
      return Hashing.murmur3_32();
    case SIPHASH24:
      return Hashing.sipHash24();
    case MD5:
      return Hashing.md5();
    case SHA1:
      return Hashing.sha1();
    case SHA256:
      return Hashing.sha256();
    case SHA512:
      return Hashing.sha512();
    case ADLER32:
      return Hashing.adler32();
    case CRC32:
      return Hashing.crc32();
    case CRC32C:
      return Hashing.crc32c();
    default:
      throw new IllegalArgumentException(Utils.format("Unsupported Hashing Algorithm: {}", hashType.name()));
  }
}
 
Example 2
Source File: SerializableSaltedHasher.java    From CuckooFilter4J with Apache License 2.0 6 votes vote down vote up
private static HashFunction configureHash(Algorithm alg, long seedNSalt, long addlSipSeed) {
	switch (alg) {
	case xxHash64:
		return new xxHashFunction(seedNSalt);
	case Murmur3_128:
		return Hashing.murmur3_128((int) seedNSalt);
	case Murmur3_32:
		return Hashing.murmur3_32((int) seedNSalt);
	case sha256:
		return Hashing.sha1();
	case sipHash24:
		return Hashing.sipHash24(seedNSalt, addlSipSeed);
	default:
		throw new IllegalArgumentException("Invalid Enum Hashing Algorithm???");
	}
}
 
Example 3
Source File: EntryPointDeduplicator.java    From burp-molly-scanner with GNU Lesser General Public License v3.0 6 votes vote down vote up
public boolean isFullDuplicate(IHttpRequestResponse messageInfo) {
    PrintWriter stdout = new PrintWriter(callbacks.getStdout(), true);
    IResponseInfo respInfo = helpers.analyzeResponse(messageInfo.getResponse());

    if (dubBloomFilter == null) return false;

    HashFunction m_hash = Hashing.murmur3_32();
    if (helpers.bytesToString(messageInfo.getResponse()).length() > respInfo.getBodyOffset()) {
        String body = helpers.bytesToString(messageInfo.getResponse()).substring(respInfo.getBodyOffset());

        /* full-dub detection */
        String dedupHashValue = m_hash.hashBytes(helpers.stringToBytes(body)).toString();
        if (dubBloomFilter.mightContain(dedupHashValue)) {
            return true;
        }
        dubBloomFilter.put(dedupHashValue);
    }

    return false;
}
 
Example 4
Source File: Hasher.java    From datafu with Apache License 2.0 6 votes vote down vote up
/**
 * Returns the HashFunction named by algorithm
 *
 * See the Hasher class docs for a list of algorithms and guidance on selection.
 *
 * @param algorithm the hash algorithm to use
 * @throws IllegalArgumentException for an invalid seed given the algorithm
 * @throws RuntimeException when the seed cannot be parsed
 */
private void makeHashFunc(String algorithm) throws IllegalArgumentException, RuntimeException
{
  if (hash_func != null) { throw new RuntimeException("The hash function should only be set once per instance"); }

  if (algorithm.startsWith("good-")) {
    int bits = Integer.parseInt(algorithm.substring(5));
    hash_func = Hashing.goodFastHash(bits);
  }
  else if (algorithm.equals("murmur3-32")) { hash_func = Hashing.murmur3_32();  }
  else if (algorithm.equals("murmur3-128")){ hash_func = Hashing.murmur3_128(); }
  else if (algorithm.equals("sip24"))      { hash_func = Hashing.sipHash24();   }
  else if (algorithm.equals("sha1"))       { hash_func = Hashing.sha1();        }
  else if (algorithm.equals("sha256"))     { hash_func = Hashing.sha256();      }
  else if (algorithm.equals("sha512"))     { hash_func = Hashing.sha512();      }
  else if (algorithm.equals("md5"))        { hash_func = Hashing.md5();         }
  else if (algorithm.equals("adler32"))    { hash_func = Hashing.adler32();     }
  else if (algorithm.equals("crc32"))      { hash_func = Hashing.crc32();       }
  else { throw new IllegalArgumentException("No hash function found for algorithm "+algorithm+". Allowed values include "+HASH_NAMES); }
}
 
Example 5
Source File: HashUtils.java    From MHAP with Apache License 2.0 6 votes vote down vote up
public final static int[] computeSequenceHashes(final String seq, final int nGramSize, boolean doReverseCompliment)
{
	HashFunction hf = Hashing.murmur3_32(0);

	int[] hashes = new int[seq.length() - nGramSize + 1];
	for (int iter = 0; iter < hashes.length; iter++)
	{
		String str = seq.substring(iter, iter + nGramSize);
		
		String strReverse = null;
		if (doReverseCompliment)
		{
			strReverse  = Utils.rc(str);
			if (strReverse.compareTo(str)<0)
				str = strReverse;
		}

		HashCode hc = hf.newHasher().putUnencodedChars(str).hash();
		hashes[iter] = hc.asInt();
	}

	return hashes;
}
 
Example 6
Source File: FailLogger.java    From hugegraph-loader with Apache License 2.0 5 votes vote down vote up
private void removeDupLines() {
    Charset charset = Charset.forName(this.struct.input().charset());
    File dedupFile = new File(this.file.getAbsolutePath() +
                               Constants.DEDUP_SUFFIX);
    try (InputStream is = new FileInputStream(this.file);
         Reader ir = new InputStreamReader(is, charset);
         BufferedReader reader = new BufferedReader(ir);
         // upper is input, below is output
         OutputStream os = new FileOutputStream(dedupFile);
         Writer ow = new OutputStreamWriter(os, charset);
         BufferedWriter writer = new BufferedWriter(ow)) {
        Set<Integer> writedLines = new HashSet<>();
        HashFunction hashFunc = Hashing.murmur3_32();
        for (String tipsLine, dataLine;
             (tipsLine = reader.readLine()) != null &&
             (dataLine = reader.readLine()) != null;) {
            /*
             * Hash data line to remove duplicate lines
             * Misjudgment may occur, but the probability is extremely low
             */
            int hash = hashFunc.hashString(dataLine, charset).asInt();
            if (!writedLines.contains(hash)) {
                writer.write(tipsLine);
                writer.newLine();
                writer.write(dataLine);
                writer.newLine();
                // Save the hash value of writed line
                writedLines.add(hash);
            }
        }
    } catch (IOException e) {
        throw new LoadException("Failed to scan and remove duplicate lines");
    }
    if (!dedupFile.renameTo(this.file)) {
        throw new LoadException("Failed to rename dedup file to origin");
    }
}
 
Example 7
Source File: MurmurHash3_32Test.java    From azure-cosmosdb-java with MIT License 5 votes vote down vote up
@Test(groups = "unit")
public void murmurHash3_32_EmptyByteArray() {
    byte[] byteArray = new byte[0];
    int actualHash = murmurHash3_32.hash(byteArray, byteArray.length, 0);

    HashFunction googleMurmur3_32 = Hashing.murmur3_32(0);
    int expectedHash = googleMurmur3_32.hashBytes(byteArray).asInt();

    assertThat(actualHash).isEqualTo(expectedHash);
}
 
Example 8
Source File: HashUtils.java    From MHAP with Apache License 2.0 5 votes vote down vote up
public static double[] randomStringGuassianVector(String str, int n, int seed)
{
	int[] seeds = new int[4];
	for (int iter=0; iter<4; iter++)
	{
		HashFunction hf = Hashing.murmur3_32(seed*4+iter);
		HashCode hc = hf.newHasher().putUnencodedChars(str).hash();
		
		seeds[iter] = hc.asInt();
	}
	
	//now generate the guassian
	MersenneTwisterFast rand = new MersenneTwisterFast(seeds);
	
	double[] vec = new double[n];
	for (int iter=0; iter<n; iter++)
	{
		vec[iter] = rand.nextGaussian();
	}
	
	//normalize
	double norm = BasicMath.norm(vec);		
	if (norm<1.0e-10)
		return vec;
	
	return BasicMath.mult(vec, 1.0/norm);
}
 
Example 9
Source File: MurmurHash3_32Test.java    From azure-cosmosdb-java with MIT License 5 votes vote down vote up
@Test(groups = "unit")
public void murmurHash3_32_NonLatin() throws UnsupportedEncodingException {
    String nonLatin = "абвгдеёжзийклмнопрстуфхцчшщъыьэюяабвгдеёжзийклмнопрстуфхцчшщъыьэюяабвгдеёжзийклмнопрстуфхцчшщъыьэюяабвгдеёжзийклмнопрстуфхцчшщъыьэюя";
    for(int i = 0; i < nonLatin.length() + 1; i++) {
        byte[] byteArray = nonLatin.substring(0, i).getBytes("UTF-8");
        int actualHash = murmurHash3_32.hash(byteArray, byteArray.length, 0);

        HashFunction googleMurmur3_32 = Hashing.murmur3_32(0);
        int expectedHash = googleMurmur3_32.hashBytes(byteArray).asInt();

        assertThat(actualHash).isEqualTo(expectedHash);
    }
}
 
Example 10
Source File: HashUtils.java    From MHAP with Apache License 2.0 5 votes vote down vote up
public final static int[] computeHashesIntLong(long obj, int numWords, int seed)
{
	int[] hashes = new int[numWords];

	HashFunction hf = Hashing.murmur3_32(seed);

	for (int iter = 0; iter < numWords; iter++)
	{
		HashCode hc = hf.newHasher().putLong(obj).putInt(iter).hash();

		hashes[iter] = hc.asInt();
	}

	return hashes;
}
 
Example 11
Source File: HashUtils.java    From MHAP with Apache License 2.0 5 votes vote down vote up
public final static int[] computeHashesIntDouble(double obj, int numWords, int seed)
{
	int[] hashes = new int[numWords];

	HashFunction hf = Hashing.murmur3_32(seed);

	for (int iter = 0; iter < numWords; iter++)
	{
		HashCode hc = hf.newHasher().putDouble(obj).putInt(iter).hash();

		hashes[iter] = hc.asInt();
	}

	return hashes;
}
 
Example 12
Source File: TestMurmur3.java    From hyperloglog with Apache License 2.0 5 votes vote down vote up
@Test
public void testHashCodesM3_32_double() {
  int seed = 123;
  Random rand = new Random(seed);
  HashFunction hf = Hashing.murmur3_32(seed);
  for (int i = 0; i < 1000; i++) {
    double val = rand.nextDouble();
    byte[] data = ByteBuffer.allocate(8).putDouble(val).array();
    int hc1 = hf.hashBytes(data).asInt();
    int hc2 = Murmur3.hash32(data, data.length, seed);
    assertEquals(hc1, hc2);
  }
}
 
Example 13
Source File: LogicalPlan.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the signature of the LogicalPlan. The signature is a unique identifier for a given
 * plan generated by a Pig script. The same script run multiple times with the same version of
 * Pig is guaranteed to produce the same signature, even if the input or output locations differ.
 *
 * @return a unique identifier for the logical plan
 * @throws FrontendException if signature can't be computed
 */
public String getSignature() throws FrontendException {

    // Use a streaming hash function. We use a murmur_32 function with a constant seed, 0.
    HashFunction hf = Hashing.murmur3_32(0);
    HashOutputStream hos = new HashOutputStream(hf);
    PrintStream ps = new PrintStream(hos);

    LogicalPlanPrinter printer = new LogicalPlanPrinter(this, ps);
    printer.visit();

    return Integer.toString(hos.getHashCode().asInt());
}
 
Example 14
Source File: PartitionByMurmurHash.java    From Mycat2 with GNU General Public License v3.0 5 votes vote down vote up
private void generateBucketMap(){
	hash=Hashing.murmur3_32(seed);//计算一致性哈希的对象
	for(int i=0;i<count;i++){//构造一致性哈希环,用TreeMap表示
		StringBuilder hashName=new StringBuilder("SHARD-").append(i);
		for(int n=0,shard=virtualBucketTimes*getWeight(i);n<shard;n++){
			bucketMap.put(hash.hashUnencodedChars(hashName.append("-NODE-").append(n)).asInt(),i);
		}
	}
	weightMap=null;
}
 
Example 15
Source File: TestMurmur3.java    From hyperloglog with Apache License 2.0 5 votes vote down vote up
@Test
public void testHashCodesM3_32_longs() {
  int seed = 123;
  Random rand = new Random(seed);
  HashFunction hf = Hashing.murmur3_32(seed);
  for (int i = 0; i < 1000; i++) {
    long val = rand.nextLong();
    byte[] data = ByteBuffer.allocate(8).putLong(val).array();
    int hc1 = hf.hashBytes(data).asInt();
    int hc2 = Murmur3.hash32(data, data.length, seed);
    assertEquals(hc1, hc2);
  }
}
 
Example 16
Source File: MiscUtils.java    From iotplatform with Apache License 2.0 5 votes vote down vote up
public static HashFunction forName(String name) {
    switch (name) {
        case "murmur3_32":
            return Hashing.murmur3_32();
        case "murmur3_128":
            return Hashing.murmur3_128();
        case "crc32":
            return Hashing.crc32();
        case "md5":
            return Hashing.md5();
        default:
            throw new IllegalArgumentException("Can't find hash function with name " + name);
    }
}
 
Example 17
Source File: CacheUtil.java    From rubix with Apache License 2.0 5 votes vote down vote up
/**
 * Get the local directory path for a given remote path.
 *
 * @param remotePath  The remote path location.
 * @param conf        The current Hadoop configuration.
 * @return The local directory path.
 */
private static String getLocalDirFor(String remotePath, Configuration conf)
{
  final int numDisks = getCacheDiskCount(conf);
  final int numBuckets = 100 * numDisks;
  final HashFunction hf = Hashing.murmur3_32();
  final HashCode hc = hf.hashString(remotePath, Charsets.UTF_8);
  final int bucket = Math.abs(hc.asInt()) % numBuckets;
  final int dirNum = (bucket / numDisks) % numDisks;

  final String dirname = getDirPath(dirNum, conf) + CacheConfig.getCacheDataDirSuffix(conf);
  return dirname;
}
 
Example 18
Source File: TestMurmur3.java    From hyperloglog with Apache License 2.0 5 votes vote down vote up
@Test
public void testHashCodesM3_32_ints() {
  int seed = 123;
  Random rand = new Random(seed);
  HashFunction hf = Hashing.murmur3_32(seed);
  for (int i = 0; i < 1000; i++) {
    int val = rand.nextInt();
    byte[] data = ByteBuffer.allocate(4).putInt(val).array();
    int hc1 = hf.hashBytes(data).asInt();
    int hc2 = Murmur3.hash32(data, data.length, seed);
    assertEquals(hc1, hc2);
  }
}
 
Example 19
Source File: ReadDnsQueueActionTest.java    From nomulus with Apache License 2.0 5 votes vote down vote up
private void run() {
  ReadDnsQueueAction action = new ReadDnsQueueAction();
  action.tldUpdateBatchSize = TEST_TLD_UPDATE_BATCH_SIZE;
  action.requestedMaximumDuration = Duration.standardSeconds(10);
  action.clock = clock;
  action.dnsQueue = dnsQueue;
  action.dnsPublishPushQueue = QueueFactory.getQueue(DNS_PUBLISH_PUSH_QUEUE_NAME);
  action.hashFunction = Hashing.murmur3_32();
  action.taskQueueUtils = new TaskQueueUtils(new Retrier(null, 1));
  action.jitterSeconds = Optional.empty();
  // Advance the time a little, to ensure that leaseTasks() returns all tasks.
  clock.advanceBy(Duration.standardHours(1));

  action.run();
}
 
Example 20
Source File: MurmurByteArrayPartitioner.java    From singer with Apache License 2.0 4 votes vote down vote up
public MurmurByteArrayPartitioner() {
  hashFunction = Hashing.murmur3_32();
}