Python sklearn.utils.murmurhash3_32() Examples

The following are 10 code examples of sklearn.utils.murmurhash3_32(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.utils , or try the search function .
Example #1
Source File: utils.py    From OpenQA with MIT License 5 votes vote down vote up
def hash(token, num_buckets):
    """Unsigned 32 bit murmurhash for feature hashing."""
    return murmurhash3_32(token, positive=True) % num_buckets


# ------------------------------------------------------------------------------
# Text cleaning.
# ------------------------------------------------------------------------------ 
Example #2
Source File: utils.py    From justcopy-backend with MIT License 5 votes vote down vote up
def hash(token, num_buckets):
    """Unsigned 32 bit murmurhash for feature hashing."""
    return murmurhash3_32(token, positive=True) % num_buckets


# ------------------------------------------------------------------------------
# Text cleaning.
# ------------------------------------------------------------------------------ 
Example #3
Source File: additional_feature.py    From combine-FEVER-NSMN with MIT License 5 votes vote down vote up
def get_embedding(self, token, seed=6):
        max_length = 5
        if self.matrix is None:
            self.create(seed)

        if len(token) <= max_length and token.isdigit():
            hash_index = murmurhash3_32(token, positive=True) % self.size
            return self.matrix[hash_index]
        else:
            return np.zeros(self.dim) 
Example #4
Source File: utils.py    From ParlAI with MIT License 5 votes vote down vote up
def hash(token, num_buckets):
    """
    Unsigned 32 bit murmurhash for feature hashing.
    """
    return murmurhash3_32(token, positive=True) % num_buckets


# ------------------------------------------------------------------------------
# Text cleaning.
# ------------------------------------------------------------------------------ 
Example #5
Source File: utils.py    From neural_chat with MIT License 5 votes vote down vote up
def hash(token, num_buckets):
    """Unsigned 32 bit murmurhash for feature hashing."""
    return murmurhash3_32(token, positive=True) % num_buckets


# ------------------------------------------------------------------------------
# Text cleaning.
# ------------------------------------------------------------------------------ 
Example #6
Source File: build_wiki_rindex.py    From semanticRetrievalMRS with MIT License 5 votes vote down vote up
def hash(token, num_buckets=None):
    """Unsigned 32 bit murmurhash for feature hashing."""
    if num_buckets is None:
        return murmurhash3_32(token, positive=True)
    else:
        return murmurhash3_32(token, positive=True) % num_buckets 
Example #7
Source File: search_utils.py    From language with Apache License 2.0 5 votes vote down vote up
def mm3hash(token, num_buckets):
  """Returns a murmur hash for given string."""
  return murmurhash3_32(token, positive=True) % num_buckets 
Example #8
Source File: layers.py    From spotlight with MIT License 5 votes vote down vote up
def _get_hashed_indices(self, original_indices):

        def _hash(x, seed):

            # TODO: integrate with padding index
            result = murmurhash3_32(x, seed=seed)
            result[self.padding_idx] = 0

            return result % self.compressed_num_embeddings

        if self._hashes is None:
            indices = np.arange(self.num_embeddings, dtype=np.int32)
            hashes = np.stack([_hash(indices, seed)
                               for seed in self._masks],
                              axis=1).astype(np.int64)
            assert hashes[self.padding_idx].sum() == 0

            self._hashes = torch.from_numpy(hashes)

            if original_indices.is_cuda:
                self._hashes = self._hashes.cuda()

        hashed_indices = torch.index_select(self._hashes,
                                            0,
                                            original_indices.squeeze())

        return hashed_indices 
Example #9
Source File: hashing_tfidf_vectorizer.py    From DeepPavlov with Apache License 2.0 5 votes vote down vote up
def hash_(token: str, hash_size: int) -> int:
    """Convert a token to a hash of given size.
    Args:
        token: a word
        hash_size: hash size

    Returns:
        int, hashed token

    """
    return murmurhash3_32(token, positive=True) % hash_size 
Example #10
Source File: utils.py    From Multi-Step-Reasoning with Apache License 2.0 5 votes vote down vote up
def hash(token, num_buckets):
    """Unsigned 32 bit murmurhash for feature hashing."""
    return murmurhash3_32(token, positive=True) % num_buckets


# ------------------------------------------------------------------------------
# Text cleaning.
# ------------------------------------------------------------------------------