Example #1
Source File:    From torch-toolbox with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _get_body(self, x, target):
        cos_t = torch.gather(x, 1, target.unsqueeze(1))  # cos(theta_yi)
        if self.easy_margin:
            cond = torch.relu(cos_t)
            cond_v = cos_t - self.threshold
            cond = torch.relu(cond_v)
        cond = cond.bool()
        # Apex would convert FP16 to FP32 here
        # cos(theta_yi + m)
        new_zy = torch.cos(torch.acos(cos_t) + self.m).type(cos_t.dtype)
        if self.easy_margin:
            zy_keep = cos_t
            zy_keep = cos_t -  # (cos(theta_yi) - sin(pi - m)*m)
        new_zy = torch.where(cond, new_zy, zy_keep)
        diff = new_zy - cos_t  # cos(theta_yi + m) - cos(theta_yi)
        gt_one_hot = F.one_hot(target, num_classes=self.classes)
        body = gt_one_hot * diff
        return body 
Example #2
Source File:    From Transformer-TTS with MIT License 6 votes vote down vote up
def forward(self, input_):
        # FFN Network
        x = input_.transpose(1, 2) 
        x = self.w_2(t.relu(self.w_1(x))) 
        x = x.transpose(1, 2) 

        # residual connection
        x = x + input_ 

        # dropout
        # x = self.dropout(x) 

        # layer normalization
        x = self.layer_norm(x) 

        return x 
Example #3
Source File:    From pykg2vec with MIT License 6 votes vote down vote up
def inner_forward(self, st_inp, first_dimension_size):
        """Implements the forward pass layers of the algorithm."""
        x = self.bn0(st_inp) # 2d batch norm over feature dimension.
        x = self.inp_drop(x) # [b, 1, 2*hidden_size_2, hidden_size_1]
        x = self.conv2d_1(x) # [b, 32, 2*hidden_size_2-3+1, hidden_size_1-3+1]
        x = self.bn1(x) # 2d batch normalization across feature dimension
        x = torch.relu(x)
        x = self.feat_drop(x)
        x = x.view(first_dimension_size, -1) # flatten => [b, 32*(2*hidden_size_2-3+1)*(hidden_size_1-3+1)
        x = self.fc(x) # dense layer => [b, k]
        x = self.hidden_drop(x)
            x = self.bn2(x) # batch normalization across the last axis
        x = torch.relu(x)
        x = torch.matmul(x, self.transpose(self.ent_embeddings.weight)) # [b, k] * [k, tot_ent] => [b, tot_ent]
        x = torch.add(x, self.b.weight) # add a bias value
        return torch.sigmoid(x) # sigmoid activation 
Example #4
Source File:    From DocRED with MIT License 6 votes vote down vote up
def forward(self, context_idxs, pos, context_ner, context_char_idxs, context_lens, h_mapping, t_mapping, relation_mask, dis_h_2_t, dis_t_2_h):
		# para_size, char_size, bsz = context_idxs.size(1), context_char_idxs.size(2), context_idxs.size(0)
		# context_ch = self.char_emb(context_char_idxs.contiguous().view(-1, char_size)).view(bsz * para_size, char_size, -1)
		# context_ch = self.char_cnn(context_ch.permute(0, 2, 1).contiguous()).max(dim=-1)[0].view(bsz, para_size, -1)

		sent =[self.word_emb(context_idxs), self.coref_embed(pos), self.ner_emb(context_ner)], dim=-1)
		context_output = self.rnn(sent, context_lens)

		context_output = torch.relu(self.linear_re(context_output))

		start_re_output = torch.matmul(h_mapping, context_output)
		end_re_output = torch.matmul(t_mapping, context_output)

		s_rep =[start_re_output, self.dis_embed(dis_h_2_t)], dim=-1)
		t_rep =[end_re_output, self.dis_embed(dis_t_2_h)], dim=-1)

		re_rep = self.bili(s_rep, t_rep)
		re_rep = self.self_att(re_rep, re_rep, relation_mask)

		return self.linear_output(re_rep) 
Example #5
Source File:    From DocRED with MIT License 6 votes vote down vote up
def forward(self, context_idxs, pos, context_ner, context_char_idxs, context_lens, sent_h_mapping, sent_t_mapping, relation_label):
		# para_size, char_size, bsz = context_idxs.size(1), context_char_idxs.size(2), context_idxs.size(0)
		# context_ch = self.char_emb(context_char_idxs.contiguous().view(-1, char_size)).view(bsz * para_size, char_size, -1)
		# context_ch = self.char_cnn(context_ch.permute(0, 2, 1).contiguous()).max(dim=-1)[0].view(bsz, para_size, -1)

		sent =[self.word_emb(context_idxs) , self.coref_embed(pos), self.ner_emb(context_ner)], dim=-1)

		el = sent_h_mapping.size(1)
		re_embed = (self.relation_embed(relation_label).unsqueeze(1)).expand(-1, el, -1)

		context_output = self.rnn(sent, context_lens)
		context_output = torch.relu(self.linear_t(context_output))
		start_re_output = torch.matmul(sent_h_mapping, context_output)
		end_re_output = torch.matmul(sent_t_mapping, context_output)

		sent_output =[start_re_output, end_re_output, re_embed], dim=-1)
		predict_sent = self.linear_re(sent_output).squeeze(2)

		# predict_sent = torch.sum(self.bili(start_re_output, end_re_output)*re_embed, dim=-1)

		return predict_sent 
Example #6
Source File:    From FlexTensor with MIT License 6 votes vote down vote up
def torch_spmv(M, K, dtype="float32", n_trial=1):
    spmm = 
    # a_np = np.random.uniform(-0.91, 0.9, [M, K]).astype(dtype)
    # b_np = np.random.uniform(-0.91, 0.9, [K, 1]).astype(dtype)
    # a_torch = torch.relu(torch.tensor(a_np)).to_sparse()
    # b_torch = torch.tensor(b_np)
    m = torch.distributions.bernoulli.Bernoulli(torch.tensor(0.9))
    a_torch = m.sample([M, K]).to_sparse()
    b_torch = m.sample([K, 1])

    # warm-up
    res = spmm(a_torch, b_torch)
    beg = time.time()
    for i in range(n_trial):
        spmm(a_torch, b_torch)
    end = time.time()
    return (end - beg) * 1e3 / n_trial 
Example #7
Source File:    From meshed-memory-transformer with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def forward(self, queries, keys, values, attention_mask=None, attention_weights=None):
        if self.can_be_stateful and self._is_stateful:
            self.running_keys =[self.running_keys, keys], 1)
            keys = self.running_keys

            self.running_values =[self.running_values, values], 1)
            values = self.running_values

        if self.identity_map_reordering:
            q_norm = self.layer_norm(queries)
            k_norm = self.layer_norm(keys)
            v_norm = self.layer_norm(values)
            out = self.attention(q_norm, k_norm, v_norm, attention_mask, attention_weights)
            out = queries + self.dropout(torch.relu(out))
            out = self.attention(queries, keys, values, attention_mask, attention_weights)
            out = self.dropout(out)
            out = self.layer_norm(queries + out)
        return out 
Example #8
Source File:    From nussl with MIT License 6 votes vote down vote up
def _make_layer(self, i):
        convolution = nn.Conv2d(
            in_channels=self.channels[i - 1] if i > 0 else self.in_channels,
            padding=self.filter_shapes[i] // 2,

        if i == len(self.channels) - 1:
            layer = convolution
            self.add_module(f'layer{i}', layer)
            return layer

        layer = nn.Sequential()
        layer.add_module('conv', convolution)
        if self.batch_norm:
            batch_norm = nn.BatchNorm2d(self.channels[i])
            layer.add_module('batch_norm', batch_norm)
        layer.add_module('relu', nn.ReLU())
        return layer 
Example #9
Source File:    From PyTorch-RL with MIT License 6 votes vote down vote up
def __init__(self, num_inputs, hidden_size=(128, 128), activation='tanh'):
        if activation == 'tanh':
            self.activation = torch.tanh
        elif activation == 'relu':
            self.activation = torch.relu
        elif activation == 'sigmoid':
            self.activation = torch.sigmoid

        self.affine_layers = nn.ModuleList()
        last_dim = num_inputs
        for nh in hidden_size:
            self.affine_layers.append(nn.Linear(last_dim, nh))
            last_dim = nh

        self.logic = nn.Linear(last_dim, 1) 
Example #10
Source File:    From SlowFast-Network-pytorch with MIT License 6 votes vote down vote up
def lp_pool2d(input, norm_type, kernel_size, stride=None, ceil_mode=False):
    # type: (Tensor, float, int, Optional[BroadcastingList2[int]], bool) -> Tensor
    r"""Applies a 2D power-average pooling over an input signal composed of
    several input planes. If the sum of all inputs to the power of `p` is
    zero, the gradient is set to zero as well.

    See :class:`~torch.nn.LPPool2d` for details.
    kw, kh = utils._pair(kernel_size)
    if stride is not None:
        stride = torch.jit._unwrap_optional(stride)
        out = avg_pool2d(input.pow(norm_type), kernel_size, stride, 0, ceil_mode)
        out = avg_pool2d(input.pow(norm_type), kernel_size, padding=0, ceil_mode=ceil_mode)

    return (torch.sign(out) * relu(torch.abs(out))).mul(kw * kh).pow(1. / norm_type) 
Example #11
Source File:    From PyTorch-RL with MIT License 6 votes vote down vote up
def __init__(self, state_dim, action_dim, hidden_size=(128, 128), activation='tanh', log_std=0):
        self.is_disc_action = False
        if activation == 'tanh':
            self.activation = torch.tanh
        elif activation == 'relu':
            self.activation = torch.relu
        elif activation == 'sigmoid':
            self.activation = torch.sigmoid

        self.affine_layers = nn.ModuleList()
        last_dim = state_dim
        for nh in hidden_size:
            self.affine_layers.append(nn.Linear(last_dim, nh))
            last_dim = nh

        self.action_mean = nn.Linear(last_dim, action_dim)

        self.action_log_std = nn.Parameter(torch.ones(1, action_dim) * log_std) 
Example #12
Source File:    From qait_public with MIT License 6 votes vote down vote up
def answer_question(self, matching_representation_sequence, doc_mask):
        square_mask = torch.bmm(doc_mask.unsqueeze(-1), doc_mask.unsqueeze(1))  # batch x time x time
        M0 = matching_representation_sequence
        M1 = M0
        for i in range(self.aggregation_layers):
             M0 = self.aggregators[i](M0, doc_mask, square_mask, i * (self.aggregation_conv_num + 2) + 1, self.aggregation_layers)
        M2 = M0
        pred = self.answer_pointer(M1, M2, doc_mask)  # batch x time
        # pred_distribution: batch x time
        pred_distribution = masked_softmax(pred, m=doc_mask, axis=-1)  # 
        if self.answer_type == "pointing":
            return pred_distribution

        z = torch.bmm(pred_distribution.view(pred_distribution.size(0), 1, pred_distribution.size(1)), M2)  # batch x 1 x inp
        z = z.view(z.size(0), -1)  # batch x inp
        hidden = self.question_answerer_output_1(z)  # batch x hid
        hidden = torch.relu(hidden)  # batch x hid
        pred = self.question_answerer_output_2(hidden)  # batch x out
        pred = masked_softmax(pred, axis=-1)
        return pred 
Example #13
Source File:    From PyTorch-RL with MIT License 6 votes vote down vote up
def __init__(self, state_dim, hidden_size=(128, 128), activation='tanh'):
        if activation == 'tanh':
            self.activation = torch.tanh
        elif activation == 'relu':
            self.activation = torch.relu
        elif activation == 'sigmoid':
            self.activation = torch.sigmoid

        self.affine_layers = nn.ModuleList()
        last_dim = state_dim
        for nh in hidden_size:
            self.affine_layers.append(nn.Linear(last_dim, nh))
            last_dim = nh

        self.value_head = nn.Linear(last_dim, 1) 
Example #14
Source File:    From gnn-comparison with GNU General Public License v3.0 6 votes vote down vote up
def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        x_all = []

        for i, layer in enumerate(self.layers):
            x = layer(x, edge_index)
            if self.aggregation == 'max':
                x = torch.relu(self.fc_max(x))

        x =, dim=1)
        x = global_max_pool(x, batch)

        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x 
Example #15
Source File:    From PyTorch-RL with MIT License 6 votes vote down vote up
def __init__(self, state_dim, action_num, hidden_size=(128, 128), activation='tanh'):
        self.is_disc_action = True
        if activation == 'tanh':
            self.activation = torch.tanh
        elif activation == 'relu':
            self.activation = torch.relu
        elif activation == 'sigmoid':
            self.activation = torch.sigmoid

        self.affine_layers = nn.ModuleList()
        last_dim = state_dim
        for nh in hidden_size:
            self.affine_layers.append(nn.Linear(last_dim, nh))
            last_dim = nh

        self.action_head = nn.Linear(last_dim, action_num) 
Example #16
Source File:    From c3dpo_nrsfm with MIT License 6 votes vote down vote up
def bbox_kp_visibility(bbox, keypoints, vis):
    bx, by, bw, bh = bbox
    x = keypoints[0]
    y = keypoints[1]
    ctx_ = 0.1
    in_box = (x >= bx-ctx_*bw) * (x <= bx+bw*(1+ctx_)) * \
        (y >= by-ctx_*bh) * (y <= by+bh*(1+ctx_))

    in_box = in_box * (vis == 1)

    err = torch.stack([(bx-ctx_*bw)-x,
    err = torch.relu(err) * vis[None].float()
    err = torch.stack((torch.max(err[0], err[1]),
                       torch.max(err[2], err[3]))).max(dim=1)[0]

    return in_box, err 
Example #17
Source File:    From mackrl with Apache License 2.0 5 votes vote down vote up
def forward(self, inputs, tformat):
        # _check_inputs_validity(inputs, self.input_shapes, tformat, allow_nonseq=True)

        main, params, m_tformat = _to_batch(inputs.get("main"), tformat)
        x = F.relu(self.fc1(main))
        vvalue = self.fc2(x)
        return _from_batch(vvalue, params, m_tformat), m_tformat 
Example #18
Source File:    From NLP_Toolkit with Apache License 2.0 5 votes vote down vote up
def forward(self, x):
        x = torch.relu(self.bn1(self.conv1(x))); #print(x.shape)
        x = self.drop1(x)
        x = torch.relu(self.bn2(self.conv2(x))); #print(x.shape)
        return x 
Example #19
Source File:    From qait_public with MIT License 5 votes vote down vote up
def forward(self, M1, M2, mask):
        X_concat =[M1, M2], dim=-1)
        X = torch.relu(self.w_1(X_concat))
        X_advantage = torch.relu(self.w_1_advantage(X_concat))
        X = X * mask.unsqueeze(-1)
        X = X + X_advantage - X_advantage.mean(-1, keepdim=True)  # combine streams
        X = X * mask.unsqueeze(-1)
        Y = self.w_2(X).squeeze()
        Y = Y * mask
        return Y 
Example #20
Source File:    From qait_public with MIT License 5 votes vote down vote up
def forward(self, x):
        x = x.transpose(1,2)
        res = torch.relu(self.pointwise_conv(self.depthwise_conv(x)))
        res = res.transpose(1,2)
        return res 
Example #21
Source File:    From qait_public with MIT License 5 votes vote down vote up
def forward(self, x, mask, self_att_mask, l, blks):
        total_layers = (self.conv_num + 2) * blks
        # conv layers
        out = PosEncoder(x)
        for i, conv in enumerate(self.convs):
            res = out
            out = self.norm_C[i](out)
            if (i) % 2 == 0:
                out = F.dropout(out, p=self.dropout,
            out = conv(out)
            out = out * mask.unsqueeze(-1)
            out = self.layer_dropout(out, res, self.dropout * float(l) / total_layers)
            l += 1
        res = out
        out = self.norm_1(out)
        out = F.dropout(out, p=self.dropout,
        # self attention
        out = self.self_att(out, self_att_mask, out, out)
        out = out * mask.unsqueeze(-1)
        out = self.layer_dropout(out, res, self.dropout * float(l) / total_layers)
        l += 1
        res = out
        out = self.norm_2(out)
        out = F.dropout(out, p=self.dropout,
        # fully connected layers
        out = self.FFN_1(out)
        out = torch.relu(out)
        out = self.FFN_2(out)
        out = out * mask.unsqueeze(-1)
        out = self.layer_dropout(out, res, self.dropout * float(l) / total_layers)
        l += 1
        return out 
Example #22
Source File:    From NLP_Toolkit with Apache License 2.0 5 votes vote down vote up
def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x 
Example #23
Source File:    From mackrl with Apache License 2.0 5 votes vote down vote up
def forward(self, inputs, n_agents, tformat, loss_fn=None, hidden_states=None, **kwargs):
        test_mode = kwargs["test_mode"]

        avail_actions, params_aa, tformat_aa = _to_batch(inputs["avail_actions"], tformat)
        x, params, tformat = _to_batch(inputs["main"], tformat)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)

        # mask policy elements corresponding to unavailable actions
        x = th.exp(x)
        x_sum = x.sum(dim=1, keepdim=True)
        second_mask = (x_sum <= np.sqrt(float(np.finfo(np.float32).tiny)) * x.shape[1])
        x_sum = x_sum.masked_fill(second_mask, 1.0)
        x = th.div(x, x_sum)

        # throw debug warning if second masking was necessary
        if th.sum(second_mask) > 0:
            if self.args.debug_verbose:
                print('Warning in MACKRLNonRecurrentAgentLevel1.forward(): some sum during the softmax has been 0!')

        # add softmax exploration (if switched on)
        # if self.args.mackrl_exploration_mode_level1 in ["softmax"] and not test_mode:
        #     epsilons = inputs["epsilons_central_level1"].unsqueeze(_tdim(tformat)).unsqueeze(0)
        #     epsilons, _, _ = _to_batch(epsilons, tformat)
        #     x = epsilons / _n_agent_pairings(n_agents) + x * (1 - epsilons)

        x = _from_batch(x, params, tformat)

        if loss_fn is not None:
            losses, _ = loss_fn(x, tformat=tformat)

        return x, hidden_states, losses, tformat 
Example #24
Source File:    From mackrl with Apache License 2.0 5 votes vote down vote up
def forward(self, inputs, tformat):

        x, n_seq, tformat = _to_batch(inputs["main"], tformat)
        x = F.relu(self.fc1(x))
        return _from_batch(x, n_seq, tformat), tformat 
Example #25
Source File:    From FlexTensor with MIT License 5 votes vote down vote up
def forward(self, left, right):
        left_res = self.left(left)
        right_res = self.right(right)
        tmp =[left_res, right_res], dim=1)
        tmp = torch.relu(tmp)
        tmp = torch.relu(self.l1(tmp))
        tmp = self.l2(tmp)
        return tmp 
Example #26
Source File:    From diora with Apache License 2.0 5 votes vote down vote up
def forward(self, hs, cs, constant=1.0):
        input_h =, 1)
        h = torch.relu(torch.matmul(input_h, self.W_0) + self.B)
        h = torch.relu(torch.matmul(h, self.W_1) + self.B_1)

        device = torch.cuda.current_device() if self.is_cuda else None
        c = torch.full(h.shape, 0, dtype=torch.float32, device=device)

        return h, c

# Score Functions 
Example #27
Source File:    From DocRED with MIT License 5 votes vote down vote up
def forward(self, context_idxs, pos, context_ner, context_char_idxs, context_lens, h_mapping, t_mapping,
				relation_mask, dis_h_2_t, dis_t_2_h):
		# para_size, char_size, bsz = context_idxs.size(1), context_char_idxs.size(2), context_idxs.size(0)
		# context_ch = self.char_emb(context_char_idxs.contiguous().view(-1, char_size)).view(bsz * para_size, char_size, -1)
		# context_ch = self.char_cnn(context_ch.permute(0, 2, 1).contiguous()).max(dim=-1)[0].view(bsz, para_size, -1)

		sent = self.word_emb(context_idxs)
		if self.use_coreference:
			sent =[sent, self.entity_embed(pos)], dim=-1)

		if self.use_entity_type:
			sent =[sent, self.ner_emb(context_ner)], dim=-1)

		# sent =[sent, context_ch], dim=-1)
		context_output = self.rnn(sent, context_lens)

		context_output = torch.relu(self.linear_re(context_output))

		start_re_output = torch.matmul(h_mapping, context_output)
		end_re_output = torch.matmul(t_mapping, context_output)

		if self.use_distance:
			s_rep =[start_re_output, self.dis_embed(dis_h_2_t)], dim=-1)
			t_rep =[end_re_output, self.dis_embed(dis_t_2_h)], dim=-1)
			predict_re = self.bili(s_rep, t_rep)
			predict_re = self.bili(start_re_output, end_re_output)

		return predict_re 
Example #28
Source File:    From DocRED with MIT License 5 votes vote down vote up
def forward(self, context_idxs, pos, context_ner, context_char_idxs, context_lens, h_mapping, t_mapping,
				relation_mask, dis_h_2_t, dis_t_2_h):
		# para_size, char_size, bsz = context_idxs.size(1), context_char_idxs.size(2), context_idxs.size(0)
		# context_ch = self.char_emb(context_char_idxs.contiguous().view(-1, char_size)).view(bsz * para_size, char_size, -1)
		# context_ch = self.char_cnn(context_ch.permute(0, 2, 1).contiguous()).max(dim=-1)[0].view(bsz, para_size, -1)

		sent =[self.word_emb(context_idxs) , self.coref_embed(pos), self.ner_emb(context_ner)], dim=-1)
		# sent =[self.word_emb(context_idxs), context_ch], dim=-1)

		# context_mask = (context_idxs > 0).float()
		context_output = self.rnn(sent, context_lens)

		context_output = torch.relu(self.linear_re(context_output))

		start_re_output = torch.matmul(h_mapping, context_output)
		end_re_output = torch.matmul(t_mapping, context_output)
		# predict_re = self.bili(start_re_output, end_re_output)

		s_rep =[start_re_output, self.dis_embed(dis_h_2_t)], dim=-1)
		t_rep =[end_re_output, self.dis_embed(dis_t_2_h)], dim=-1)
		predict_re = self.bili(s_rep, t_rep)

		return predict_re 
Example #29
Source File:    From FlexTensor with MIT License 5 votes vote down vote up
def parted_linear(x, left, right):
    import torch
    if left > right:
        left, right = right, left
    return torch.relu(right - torch.relu(right - x) - left) + left 
Example #30
Source File:    From FlexTensor with MIT License 5 votes vote down vote up
def __init__(self, input_len):
        super(PerformanceModel, self).__init__()
        self.input_len = input_len

        self.linear1 = nn.Linear(self.input_len, 32, bias=True)
        # self.batch_norm1 = nn.BatchNorm1d(128)
        self.dropout1 = nn.Dropout(p=0.01)
        self.activate1 = torch.relu

        self.linear2 = nn.Linear(32, 64, bias=True)
        # self.batch_norm2 = nn.BatchNorm1d(128)
        self.dropout2 = nn.Dropout(p=0.01)
        self.activate2 = torch.relu

        self.linear3 = nn.Linear(64, 128, bias=True)
        # self.batch_norm3 = nn.BatchNorm1d(128)
        self.dropout3 = nn.Dropout(p=0.01)
        self.activate3 = torch.relu

        self.linear4 = nn.Linear(128, 64, bias=True)
        # self.batch_norm4 = nn.BatchNorm1d(128)
        self.dropout4 = nn.Dropout(p=0.01)
        self.activate4 = torch.relu

        self.linear5 = nn.Linear(64, 16, bias=True)
        # self.batch_norm5 = nn.BatchNorm1d(128)
        # self.dropout5 = nn.Dropout(p=0.2)
        self.activate5 = torch.relu

        self.linear6 = nn.Linear(16, 1, bias=True)
        self.activate6 = torch.relu