Python torch.renorm() Examples

The following are 3 code examples of torch.renorm(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch , or try the search function .
Example #1
Source File: lorentz.py    From lorentz-embeddings with MIT License 5 votes vote down vote up
def set_dim0(x):
    x = torch.renorm(x, p=2, dim=0, maxnorm=1e2)  # otherwise leaves will explode
    # NOTE: the paper does not mention the square part of the equation but if
    # you try to derive it you get a square term in the equation
    dim0 = torch.sqrt(1 + (x[:, 1:] ** 2).sum(dim=1))
    x[:, 0] = dim0
    return x


# ========================= models 
Example #2
Source File: embedding_atten_v2.py    From Tencent_Ads_Algo_2018 with MIT License 4 votes vote down vote up
def forward(self, input, offsets, ref=None):
        '''

        :param input:  a 1-dim tensor of indices
        :param offset: a 1-dim tensor of offsets
        :param ref: a 2-dim tensor of ref feats, typically the features of ads
        :return:
        '''
        assert (ref is None and not self.atten) or (ref is not None and self.atten)
        # add 1 dim for Embedding
        input = input.view(1,-1)
        # return 1, n_word, n_dim
        embedding = self.embedder(input)
        #print(embedding)
        size = embedding.size()
        # n_word, n_dim
        embedding = embedding.view(size[1],size[2])
        if self.atten:
            size = embedding.size()
            # replicate ref n_word, n_dim
            ref = replicate(ref,offsets,size[0])
            #print(ref)
            # calculate the attention
            #todo
            diff = ref-embedding
            feat_for_atten = torch.cat([embedding,diff,ref],dim=1)
            atten = self.linear1(feat_for_atten)
            atten = self.activation(atten)
            atten = self.linear2(atten)
            # n_word, 1
            atten = self.sigmoid(atten)
            # print(atten)
            embedding = embedding * atten
            #print(embedding)
        # n_sample, n_dim
        res = reduce(embedding,offsets,self.mode)
        # following lines constrain the max norm of embedding.
        size = res.size()
        # n_sample, n_field, n_dim//n_field
        res = res.view(size[0]*self.n_field,size[1]//self.n_field)
        renorm_res = torch.renorm(res,p=self.norm_type,dim=0,maxnorm=self.max_norm)
        renorm_res = renorm_res.contiguous()
        # res = F.normalize(res,p=self.norm_type,dim=2)*self.max_norm
        res = renorm_res.view(size[0],size[1])
        return res 
Example #3
Source File: utils.py    From kbqa-ar-smcnn with Apache License 2.0 4 votes vote down vote up
def load_word2vec_format(filename, word_idx, binary=False, normalize=False,
                         encoding='utf8', unicode_errors='ignore'):
    """
    refer to gensim
    load Word Embeddings
    If you trained the C model using non-utf8 encoding for words, specify that
    encoding in `encoding`.
    :param filename :
    :param word_idx :
    :param binary   : a boolean indicating whether the data is in binary word2vec format.
    :param normalize:
    :param encoding :
    :param unicode_errors: errors can be 'strict', 'replace' or 'ignore' and defaults to 'strict'.
    """
    vocab = set()
    print("loading word embedding from %s" % filename)
    with open(filename, 'rb') as fin:
#        header = to_unicode(fin.readline(), encoding=encoding)
#        vocab_size, vector_size = map(int, header.split())  # throws for invalid file format
        vocab_size = 1917494
        vector_size = 300
        word_matrix = torch.zeros(len(word_idx), vector_size)

        def add_word(_word, _weights):
            if _word not in word_idx:
                return
            vocab.add(_word)
            word_matrix[word_idx[_word]] = _weights

        if binary:
            binary_len = np.dtype(np.float32).itemsize * vector_size
            for _ in range(vocab_size):
                # mixed text and binary: read text first, then binary
                word = []
                while True:
                    ch = fin.read(1)
                    if ch == b' ':
                        break
                    if ch != b'\n':  # ignore newlines in front of words (some binary files have)
                        word.append(ch)
                word = to_unicode(b''.join(word), encoding=encoding, errors=unicode_errors)
                weights = torch.from_numpy(np.fromstring(fin.read(binary_len), dtype=REAL))
                add_word(word, weights)
        else:
            for line_no, line in enumerate(fin):
                parts = to_unicode(line.rstrip(), encoding=encoding, errors=unicode_errors).split(" ")
                if len(parts) != vector_size + 1:
                    raise ValueError("invalid vector on line %s (is this really the text format?)" % line_no)
                word, weights = parts[0], list(map(float, parts[1:]))
                weights = torch.Tensor(weights)
                add_word(word, weights)
    if word_idx is not None:
        assert (len(word_idx), vector_size) == word_matrix.size()
    if normalize:
        # each row normalize to 1
        word_matrix = torch.renorm(word_matrix, 2, 0, 1)
    print("loaded %d words pre-trained from %s with %d" % (len(vocab), filename, vector_size))
    return word_matrix, vector_size, vocab