Python torch.log_softmax() Examples

The following are 30 code examples of torch.log_softmax(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch , or try the search function .
Example #1
Source File: metrics.py    From OpenTransformer with MIT License 6 votes vote down vote up
def forward(self, x, target):
        """Compute loss between x and target

        :param torch.Tensor x: prediction (batch, seqlen, class)
        :param torch.Tensor target: target signal masked with self.padding_id (batch, seqlen)
        :return: scalar float value
        :rtype torch.Tensor
        """
        assert x.size(2) == self.size
        batch_size = x.size(0)
        x = x.view(-1, self.size)
        target = target.reshape(-1)
        with torch.no_grad():
            true_dist = x.clone()
            true_dist.fill_(self.smoothing / (self.size - 1))
            ignore = target == self.padding_idx  # (B,)
            total = len(target) - ignore.sum().item()
            target = target.masked_fill(ignore, 0)  # avoid -1 index
            true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
        denom = total if self.normalize_length else batch_size
        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom 
Example #2
Source File: transformer.py    From dl4mt-seqgen with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def forward(self, x, y, get_scores=False):
        """
        Compute the loss, and optionally the scores.
        """
        assert (y == self.pad_index).sum().item() == 0

        if self.asm is False:
            scores = self.proj(x).view(-1, self.n_words)
            if self.label_smoothing == 0.0:
                loss = F.cross_entropy(scores, y, reduction='elementwise_mean')
            else:
                lprobs = torch.log_softmax(scores, dim=1)
                nll_loss = -lprobs.gather(dim=-1, index=y.unsqueeze(1))
                smooth_loss = -lprobs.sum(dim=-1, keepdim=True)
                nll_loss, smooth_loss = nll_loss.sum(), smooth_loss.sum()
                eps_i = self.label_smoothing / lprobs.size(-1)
                loss = (1. - self.label_smoothing) * nll_loss + eps_i * smooth_loss
                loss = loss / x.shape[0]
        else:
            _, loss = self.proj(x, y)
            scores = self.proj.log_prob(x) if get_scores else None

        return scores, loss 
Example #3
Source File: test_beam_search.py    From encoder-agnostic-adaptation with MIT License 6 votes vote down vote up
def init_step(self, beam, expected_len_pen):
        # init_preds: [4, 3, 5, 6, 7] - no EOS's
        init_scores = torch.log_softmax(torch.tensor(
            [[0, 0, 0, 4, 5, 3, 2, 1]], dtype=torch.float), dim=1)
        init_scores = deepcopy(init_scores.repeat(
            self.BATCH_SZ * self.BEAM_SZ, 1))
        new_scores = init_scores + beam.topk_log_probs.view(-1).unsqueeze(1)
        expected_beam_scores, expected_preds_0 = new_scores \
            .view(self.BATCH_SZ, self.BEAM_SZ * self.N_WORDS) \
            .topk(self.BEAM_SZ, dim=-1)
        beam.advance(deepcopy(init_scores), self.random_attn())
        self.assertTrue(beam.topk_log_probs.allclose(expected_beam_scores))
        self.assertTrue(beam.topk_ids.equal(expected_preds_0))
        self.assertFalse(beam.is_finished.any())
        self.assertFalse(beam.done)
        return expected_beam_scores 
Example #4
Source File: label_smoothing_loss.py    From espnet with Apache License 2.0 6 votes vote down vote up
def forward(self, x, target):
        """Compute loss between x and target.

        :param torch.Tensor x: prediction (batch, seqlen, class)
        :param torch.Tensor target:
            target signal masked with self.padding_id (batch, seqlen)
        :return: scalar float value
        :rtype torch.Tensor
        """
        assert x.size(2) == self.size
        batch_size = x.size(0)
        x = x.view(-1, self.size)
        target = target.view(-1)
        with torch.no_grad():
            true_dist = x.clone()
            true_dist.fill_(self.smoothing / (self.size - 1))
            ignore = target == self.padding_idx  # (B,)
            total = len(target) - ignore.sum().item()
            target = target.masked_fill(ignore, 0)  # avoid -1 index
            true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
        denom = total if self.normalize_length else batch_size
        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom 
Example #5
Source File: test_beam_search.py    From OpenNMT-py with MIT License 6 votes vote down vote up
def init_step(self, beam, expected_len_pen):
        # init_preds: [4, 3, 5, 6, 7] - no EOS's
        init_scores = torch.log_softmax(torch.tensor(
            [[0, 0, 0, 4, 5, 3, 2, 1]], dtype=torch.float), dim=1)
        init_scores = deepcopy(init_scores.repeat(
            self.BATCH_SZ * self.BEAM_SZ, 1))
        new_scores = init_scores + beam.topk_log_probs.view(-1).unsqueeze(1)
        expected_beam_scores, expected_preds_0 = new_scores \
            .view(self.BATCH_SZ, self.BEAM_SZ * self.N_WORDS) \
            .topk(self.BEAM_SZ, dim=-1)
        beam.advance(deepcopy(init_scores), self.random_attn())
        self.assertTrue(beam.topk_log_probs.allclose(expected_beam_scores))
        self.assertTrue(beam.topk_ids.equal(expected_preds_0))
        self.assertFalse(beam.is_finished.any())
        self.assertFalse(beam.done)
        return expected_beam_scores 
Example #6
Source File: test_softmax.py    From pytorch_scatter with MIT License 6 votes vote down vote up
def test_log_softmax():
    src = torch.tensor([0.2, 0, 0.2, -2.1, 3.2, 7, -1, float('-inf')])
    src.requires_grad_()
    index = torch.tensor([0, 1, 0, 1, 1, 2, 4, 4])

    out = scatter_log_softmax(src, index)

    out0 = torch.log_softmax(torch.tensor([0.2, 0.2]), dim=-1)
    out1 = torch.log_softmax(torch.tensor([0, -2.1, 3.2]), dim=-1)
    out2 = torch.log_softmax(torch.tensor([7], dtype=torch.float), dim=-1)
    out4 = torch.log_softmax(torch.tensor([-1, float('-inf')]), dim=-1)

    expected = torch.stack([
        out0[0], out1[0], out0[1], out1[1], out1[2], out2[0], out4[0], out4[1]
    ], dim=0)

    assert torch.allclose(out, expected)

    out.backward(torch.randn_like(out)) 
Example #7
Source File: ctc.py    From neural_sp with Apache License 2.0 6 votes vote down vote up
def greedy(self, eouts, elens):
        """Greedy decoding.

        Args:
            eouts (FloatTensor): `[B, T, enc_n_units]`
            elens (np.ndarray): `[B]`
        Returns:
            hyps (np.ndarray): Best path hypothesis. `[B, L]`

        """
        log_probs = torch.log_softmax(self.output(eouts), dim=-1)
        best_paths = log_probs.argmax(-1)  # `[B, L]`

        hyps = []
        for b in range(eouts.size(0)):
            indices = [best_paths[b, t].item() for t in range(elens[b])]

            # Step 1. Collapse repeated labels
            collapsed_indices = [x[0] for x in groupby(indices)]

            # Step 2. Remove all blank labels
            best_hyp = [x for x in filter(lambda x: x != self.blank, collapsed_indices)]
            hyps.append(np.array(best_hyp))

        return np.array(hyps) 
Example #8
Source File: test_beam_search.py    From OpenNMT-kpg-release with MIT License 6 votes vote down vote up
def init_step(self, beam, expected_len_pen):
        # init_preds: [4, 3, 5, 6, 7] - no EOS's
        init_scores = torch.log_softmax(torch.tensor(
            [[0, 0, 0, 4, 5, 3, 2, 1]], dtype=torch.float), dim=1)
        init_scores = deepcopy(init_scores.repeat(
            self.BATCH_SZ * self.BEAM_SZ, 1))
        new_scores = init_scores + beam.topk_log_probs.view(-1).unsqueeze(1)
        expected_beam_scores, expected_preds_0 = new_scores \
            .view(self.BATCH_SZ, self.BEAM_SZ * self.N_WORDS) \
            .topk(self.BEAM_SZ, dim=-1)
        beam.advance(deepcopy(init_scores), self.random_attn())
        self.assertTrue(beam.topk_log_probs.allclose(expected_beam_scores))
        self.assertTrue(beam.topk_ids.equal(expected_preds_0))
        self.assertFalse(beam.is_finished.any())
        self.assertFalse(beam.done)
        return expected_beam_scores 
Example #9
Source File: criterion.py    From neural_sp with Apache License 2.0 6 votes vote down vote up
def focal_loss(logits, ys, ylens, alpha, gamma):
    """Compute focal loss.

    Args:
        logits (FloatTensor): `[B, T, vocab]`
        ys (LongTensor): Indices of labels. `[B, L]`
        ylens (IntTensor): `[B]`
        alpha (float):
        gamma (float):
    Returns:
        loss_mean (FloatTensor): `[1]`

    """
    bs = ys.size(0)

    log_probs = torch.log_softmax(logits, dim=-1)
    probs_inv = -torch.softmax(logits, dim=-1) + 1
    loss = -alpha * torch.mul(torch.pow(probs_inv, gamma), log_probs)
    loss_mean = np.sum([loss[b, :ylens[b], :].sum() for b in range(bs)]) / ylens.sum()
    return loss_mean 
Example #10
Source File: criterion.py    From neural_sp with Apache License 2.0 6 votes vote down vote up
def kldiv_lsm_ctc(logits, ylens):
    """Compute KL divergence loss for label smoothing of CTC and Transducer models.

    Args:
        logits (FloatTensor): `[B, T, vocab]`
        ylens (IntTensor): `[B]`
    Returns:
        loss_mean (FloatTensor): `[1]`

    """
    bs, _, vocab = logits.size()

    log_uniform = logits.new_zeros(logits.size()).fill_(math.log(1 / (vocab - 1)))
    probs = torch.softmax(logits, dim=-1)
    log_probs = torch.log_softmax(logits, dim=-1)
    loss = torch.mul(probs, log_probs - log_uniform)
    loss_mean = np.sum([loss[b, :ylens[b], :].sum() for b in range(bs)]) / ylens.sum()
    # assert loss_mean >= 0
    return loss_mean 
Example #11
Source File: criterion.py    From neural_sp with Apache License 2.0 6 votes vote down vote up
def distillation(logits_student, logits_teacher, ylens, temperature=5.0):
    """Compute cross entropy loss for knowledge distillation of sequence-to-sequence models.

    Args:
        logits_student (FloatTensor): `[B, T, vocab]`
        logits_teacher (FloatTensor): `[B, T, vocab]`
        ylens (IntTensor): `[B]`
        temperature (float):
    Returns:
        loss_mean (FloatTensor): `[1]`

    """
    bs, _, vocab = logits_student.size()

    log_probs_student = torch.log_softmax(logits_student, dim=-1)
    probs_teacher = torch.softmax(logits_teacher / temperature, dim=-1).data
    loss = -torch.mul(probs_teacher, log_probs_student)
    loss_mean = np.sum([loss[b, :ylens[b], :].sum() for b in range(bs)]) / ylens.sum()
    return loss_mean 
Example #12
Source File: models.py    From sparse-sharing with MIT License 6 votes vote down vote up
def forward(self, task_id, x, y, seq_len):
        words_emb = self.embedding(x)
        char_emb = self.char(x)
        x = torch.cat([words_emb, char_emb], dim=-1)
        x, _ = self.lstm(x, seq_len)
        self.dropout(x)
        logit = self.out[task_id[0]](x)

        seq_mask = seq_len_to_mask(seq_len, x.size(1))
        if self.crf is not None:
            logit = torch.log_softmax(logit, dim=-1)
            loss = self.crf[task_id[0]](logit, y, seq_mask).mean()
            pred = self.crf[task_id[0]].viterbi_decode(logit, seq_mask)[0]
        else:
            loss = ce_loss(logit, y, seq_mask)
            pred = torch.argmax(logit, dim=2)
        return {"loss": loss, "pred": pred} 
Example #13
Source File: train.py    From crnn.pytorch with Apache License 2.0 6 votes vote down vote up
def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, args):
    epoch_loss = 0.0
    for image, target, input_len, target_len in tqdm(data_loader):
        image = image.to(device)
        # print(target, target_len, input_len)
        outputs = model(image.to(torch.float32))  # [B,N,C]
        outputs = torch.log_softmax(outputs, dim=2)
        outputs = outputs.permute([1, 0, 2])  # [N,B,C]
        loss = criterion(outputs[:], target, input_len, target_len)
        # 梯度更新
        model.zero_grad()
        loss.backward()
        optimizer.step()
        # 当前轮的loss
        epoch_loss += loss.item() * image.size(0)
        if np.isnan(loss.item()):
            print(target, input_len, target_len)

    epoch_loss = epoch_loss / len(data_loader.dataset)
    # 打印日志,保存权重
    print('Epoch: {}/{} loss: {:03f}'.format(epoch + 1, args.epochs, epoch_loss))
    return epoch_loss 
Example #14
Source File: label_smooth.py    From pytorch-loss with MIT License 6 votes vote down vote up
def forward(ctx, logits, label, lb_smooth, lb_ignore):
        # prepare label
        num_classes = logits.size(1)
        lb_pos, lb_neg = 1. - lb_smooth, lb_smooth / num_classes
        label = label.clone().detach()
        ignore = label == lb_ignore
        n_valid = (label != lb_ignore).sum()
        label[ignore] = 0
        lb_one_hot = torch.empty_like(logits).fill_(
            lb_neg).scatter_(1, label.unsqueeze(1), lb_pos).detach()

        ignore = ignore.nonzero()
        _, M = ignore.size()
        a, *b = ignore.chunk(M, dim=1)
        mask = [a, torch.arange(logits.size(1)), *b]
        lb_one_hot[mask] = 0
        coeff = (num_classes - 1) * lb_neg + lb_pos

        ctx.variables = coeff, mask, logits, lb_one_hot

        loss = torch.log_softmax(logits, dim=1).neg_().mul_(lb_one_hot).sum(dim=1)
        return loss 
Example #15
Source File: FFM_Multi_PyTorch.py    From Awesome-RecSystem-Models with MIT License 6 votes vote down vote up
def forward(self, x):
        # 先计算得到线性的那一部分
        linear_part = self.linear(x)

        # 计算交叉部分
        interaction_part = 0.0
        for i in range(self.fea_num):
            for j in range(i + 1, self.fea_num):
                v_ifj = self.v[i, self.field_map_dict[j], :, :]
                v_jfi = self.v[j, self.field_map_dict[i], :, :]

                xij = torch.unsqueeze(x[:, i] * x[:, j], dim=1)
                v_ijji = torch.unsqueeze(torch.sum(v_ifj * v_jfi, dim=0), dim=0)

                interaction_part += torch.mm(xij, v_ijji)

        output = linear_part + interaction_part
        output = torch.log_softmax(output, dim=1)
        return output 
Example #16
Source File: label_smoothing_loss.py    From adviser with GNU General Public License v3.0 6 votes vote down vote up
def forward(self, x, target):
        """Compute loss between x and target.

        :param torch.Tensor x: prediction (batch, seqlen, class)
        :param torch.Tensor target: target signal masked with self.padding_id (batch, seqlen)
        :return: scalar float value
        :rtype torch.Tensor
        """
        assert x.size(2) == self.size
        batch_size = x.size(0)
        x = x.view(-1, self.size)
        target = target.view(-1)
        with torch.no_grad():
            true_dist = x.clone()
            true_dist.fill_(self.smoothing / (self.size - 1))
            ignore = target == self.padding_idx  # (B,)
            total = len(target) - ignore.sum().item()
            target = target.masked_fill(ignore, 0)  # avoid -1 index
            true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
        kl = self.criterion(torch.log_softmax(x, dim=1), true_dist)
        denom = total if self.normalize_length else batch_size
        return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom 
Example #17
Source File: test_beam.py    From OpenNMT-kpg-release with MIT License 5 votes vote down vote up
def first_step(self, beam, expected_beam_scores, expected_len_pen):
        # no EOS's yet
        assert len(beam.finished) == 0
        scores_1 = torch.log_softmax(torch.tensor(
            [[0, 0,  0, .3,   0, .51, .2, 0],
             [0, 0, 1.5,  0,   0,   0,  0, 0],
             [0, 0,  0,  0, .49, .48,  0, 0],
             [0, 0, 0, .2, .2, .2, .2, .2],
             [0, 0, 0, .2, .2, .2, .2, .2]]
        ), dim=1)

        beam.advance(scores_1, torch.randn(self.BEAM_SZ, self.INP_SEQ_LEN))

        new_scores = scores_1 + expected_beam_scores.t()
        expected_beam_scores, unreduced_preds = new_scores.view(-1).topk(
            self.BEAM_SZ, 0, True, True)
        expected_bptr_1 = unreduced_preds / self.N_WORDS
        # [5, 3, 2, 6, 0], so beam 2 predicts EOS!
        expected_preds_1 = unreduced_preds - expected_bptr_1 * self.N_WORDS

        self.assertTrue(beam.scores.allclose(expected_beam_scores))
        self.assertTrue(beam.next_ys[-1].equal(expected_preds_1))
        self.assertTrue(beam.prev_ks[-1].equal(expected_bptr_1))
        self.assertEqual(len(beam.finished), 1)
        self.assertEqual(beam.finished[0][2], 2)  # beam 2 finished
        self.assertEqual(beam.finished[0][1], 2)  # finished on second step
        self.assertEqual(beam.finished[0][0],  # finished with correct score
                         expected_beam_scores[2] / expected_len_pen)
        self.assertFalse(beam.eos_top)
        self.assertFalse(beam.done)
        return expected_beam_scores 
Example #18
Source File: test_beam.py    From OpenNMT-kpg-release with MIT License 5 votes vote down vote up
def init_step(self, beam):
        # init_preds: [4, 3, 5, 6, 7] - no EOS's
        init_scores = torch.log_softmax(torch.tensor(
            [[0, 0, 0, 4, 5, 3, 2, 1]], dtype=torch.float), dim=1)
        expected_beam_scores, expected_preds_0 = init_scores.topk(self.BEAM_SZ)
        beam.advance(init_scores, torch.randn(self.BEAM_SZ, self.INP_SEQ_LEN))
        self.assertTrue(beam.scores.allclose(expected_beam_scores))
        self.assertTrue(beam.next_ys[-1].equal(expected_preds_0[0]))
        self.assertFalse(beam.eos_top)
        self.assertFalse(beam.done)
        return expected_beam_scores 
Example #19
Source File: test_beam_search.py    From OpenNMT-kpg-release with MIT License 5 votes vote down vote up
def third_step(self, beam, expected_beam_scores, expected_len_pen):
        # assumes beam 0 finished on last step
        scores_3 = torch.log_softmax(torch.tensor(
            [[0, 0,  5000, 0,   5000, .51, .2, 0],  # beam 0 shouldn't cont
             [0, 0, 0,  0,   0,   0,  0, 0],
             [0, 0,  0,  0, 0, 5000,  0, 0],
             [0, 0, 0, .2, .2, .2, .2, .2],
             [0, 0, 50, 0, .2, .2, .2, .2]]  # beam 4 -> beam 1 should die
        ), dim=1)
        scores_3 = scores_3.repeat(self.BATCH_SZ, 1)

        beam.advance(deepcopy(scores_3), self.random_attn())

        expected_beam_scores[:, 0::self.BEAM_SZ] = self.DEAD_SCORE
        new_scores = scores_3 + expected_beam_scores.view(-1).unsqueeze(1)
        expected_beam_scores, unreduced_preds = new_scores\
            .view(self.BATCH_SZ, self.BEAM_SZ * self.N_WORDS)\
            .topk(self.BEAM_SZ, -1)
        expected_bptr_3 = unreduced_preds / self.N_WORDS
        # [5, 2, 6, 1, 0] repeat self.BATCH_SZ, so beam 1 predicts EOS!
        expected_preds_3 = unreduced_preds - expected_bptr_3 * self.N_WORDS
        self.assertTrue(beam.topk_log_probs.allclose(
            expected_beam_scores))
        self.assertTrue(beam.topk_scores.allclose(
            expected_beam_scores / expected_len_pen))
        self.assertTrue(beam.topk_ids.equal(expected_preds_3))
        self.assertTrue(beam.current_backptr.equal(expected_bptr_3))
        self.assertEqual(beam.is_finished.sum(), self.BATCH_SZ)
        # new beam 1 finished
        self.assertTrue(beam.is_finished[:, 1].all())
        # new beam 1 is old beam 4
        self.assertTrue(expected_bptr_3[:, 1].eq(4).all())
        beam.update_finished()
        self.assertTrue(beam.top_beam_finished.all())
        self.assertTrue(beam.done)
        return expected_beam_scores 
Example #20
Source File: sign.py    From ogb with MIT License 5 votes vote down vote up
def forward(self, xs):
        outs = []
        for x, lin in zip(xs, self.lins):
            out = F.dropout(F.relu(lin(x)), p=0.5, training=self.training)
            outs.append(out)
        x = torch.cat(outs, dim=-1)
        x = self.lin(x)
        return torch.log_softmax(x, dim=-1) 
Example #21
Source File: parts.py    From NeMo with Apache License 2.0 5 votes vote down vote up
def forward(self, hidden_states):
        output_states = hidden_states[:]
        for i in range(self.layers):
            output_states = getattr(self, f'layer{i}')(output_states)

        if self.log_softmax:
            output_states = torch.log_softmax(output_states.float(), dim=-1).to(hidden_states.dtype)
            # TODO: make it work with float16
        return output_states 
Example #22
Source File: decoder.py    From adviser with GNU General Public License v3.0 5 votes vote down vote up
def forward_one_step(self, tgt, tgt_mask, memory, cache=None):
        """Forward one step.

        :param torch.Tensor tgt: input token ids, int64 (batch, maxlen_out)
        :param torch.Tensor tgt_mask: input token mask,  (batch, maxlen_out)
                                      dtype=torch.uint8 in PyTorch 1.2-
                                      dtype=torch.bool in PyTorch 1.2+ (include 1.2)
        :param torch.Tensor memory: encoded memory, float32  (batch, maxlen_in, feat)
        :param List[torch.Tensor] cache: cached output list of (batch, max_time_out-1, size)
        :return y, cache: NN output value and cache per `self.decoders`.
            `y.shape` is (batch, maxlen_out, token)
        :rtype: Tuple[torch.Tensor, List[torch.Tensor]]
        """
        x = self.embed(tgt)
        if cache is None:
            cache = [None] * len(self.decoders)
        new_cache = []
        for c, decoder in zip(cache, self.decoders):
            x, tgt_mask, memory, memory_mask = decoder(x, tgt_mask, memory, None, cache=c)
            new_cache.append(x)

        if self.normalize_before:
            y = self.after_norm(x[:, -1])
        else:
            y = x[:, -1]
        if self.output_layer is not None:
            y = torch.log_softmax(self.output_layer(y), dim=-1)

        return y, new_cache

    # beam search API (see ScorerInterface) 
Example #23
Source File: pretrain_mnist_clf.py    From BiAAE with MIT License 5 votes vote down vote up
def get_logits(self, x):
        return torch.log_softmax(self.forward(x), dim=-1) 
Example #24
Source File: test_beam_search.py    From OpenNMT-py with MIT License 5 votes vote down vote up
def third_step(self, beam, expected_beam_scores, expected_len_pen):
        # assumes beam 0 finished on last step
        scores_3 = torch.log_softmax(torch.tensor(
            [[0, 0, 5000, 0, 5000, .51, .2, 0],  # beam 0 shouldn't cont
             [0, 0, 0, 0, 0, 0, 0, 0],
             [0, 0, 0, 0, 0, 5000, 0, 0],
             [0, 0, 0, .2, .2, .2, .2, .2],
             [0, 0, 50, 0, .2, .2, .2, .2]]  # beam 4 -> beam 1 should die
        ), dim=1)
        scores_3 = scores_3.repeat(self.BATCH_SZ, 1)

        beam.advance(deepcopy(scores_3), self.random_attn())

        expected_beam_scores[:, 0::self.BEAM_SZ] = self.DEAD_SCORE
        new_scores = scores_3 + expected_beam_scores.view(-1).unsqueeze(1)
        expected_beam_scores, unreduced_preds = new_scores\
            .view(self.BATCH_SZ, self.BEAM_SZ * self.N_WORDS)\
            .topk(self.BEAM_SZ, -1)
        expected_bptr_3 = unreduced_preds / self.N_WORDS
        # [5, 2, 6, 1, 0] repeat self.BATCH_SZ, so beam 1 predicts EOS!
        expected_preds_3 = unreduced_preds - expected_bptr_3 * self.N_WORDS
        self.assertTrue(beam.topk_log_probs.allclose(
            expected_beam_scores))
        self.assertTrue(beam.topk_scores.allclose(
            expected_beam_scores / expected_len_pen))
        self.assertTrue(beam.topk_ids.equal(expected_preds_3))
        self.assertTrue(beam.current_backptr.equal(expected_bptr_3))
        self.assertEqual(beam.is_finished.sum(), self.BATCH_SZ)
        # new beam 1 finished
        self.assertTrue(beam.is_finished[:, 1].all())
        # new beam 1 is old beam 4
        self.assertTrue(expected_bptr_3[:, 1].eq(4).all())
        beam.update_finished()
        self.assertTrue(beam.top_beam_finished.all())
        self.assertTrue(beam.done)
        return expected_beam_scores 
Example #25
Source File: test_beam_search.py    From OpenNMT-py with MIT License 5 votes vote down vote up
def first_step(self, beam, expected_beam_scores, expected_len_pen):
        # no EOS's yet
        assert beam.is_finished.sum() == 0
        scores_1 = torch.log_softmax(torch.tensor(
            [[0, 0, 0, .3, 0, .51, .2, 0],
             [0, 0, 1.5, 0, 0, 0, 0, 0],
             [0, 0, 0, 0, .49, .48, 0, 0],
             [0, 0, 0, .2, .2, .2, .2, .2],
             [0, 0, 0, .2, .2, .2, .2, .2]]
        ), dim=1)
        scores_1 = scores_1.repeat(self.BATCH_SZ, 1)

        beam.advance(deepcopy(scores_1), self.random_attn())

        new_scores = scores_1 + expected_beam_scores.view(-1).unsqueeze(1)
        expected_beam_scores, unreduced_preds = new_scores\
            .view(self.BATCH_SZ, self.BEAM_SZ * self.N_WORDS)\
            .topk(self.BEAM_SZ, -1)
        expected_bptr_1 = unreduced_preds / self.N_WORDS
        # [5, 3, 2, 6, 0], so beam 2 predicts EOS!
        expected_preds_1 = unreduced_preds - expected_bptr_1 * self.N_WORDS
        self.assertTrue(beam.topk_log_probs.allclose(expected_beam_scores))
        self.assertTrue(beam.topk_scores.allclose(
            expected_beam_scores / expected_len_pen))
        self.assertTrue(beam.topk_ids.equal(expected_preds_1))
        self.assertTrue(beam.current_backptr.equal(expected_bptr_1))
        self.assertEqual(beam.is_finished.sum(), self.BATCH_SZ)
        self.assertTrue(beam.is_finished[:, 2].all())  # beam 2 finished
        beam.update_finished()
        self.assertFalse(beam.top_beam_finished.any())
        self.assertFalse(beam.done)
        return expected_beam_scores 
Example #26
Source File: mtpnet.py    From jsis3d with MIT License 5 votes vote down vote up
def forward(self, x):
        x = self.net(x)
        logits = self.fc1(x)
        logits = logits.transpose(2, 1)
        logits = torch.log_softmax(logits, dim=-1)
        embedded = self.fc2(x)
        embedded = embedded.transpose(2, 1)
        return logits, embedded 
Example #27
Source File: label_smooth.py    From pytorch-loss with MIT License 5 votes vote down vote up
def __init__(self, lb_smooth=0.1, reduction='mean', ignore_index=-100):
        super(LabelSmoothSoftmaxCEV1, self).__init__()
        self.lb_smooth = lb_smooth
        self.reduction = reduction
        self.lb_ignore = ignore_index
        self.log_softmax = nn.LogSoftmax(dim=1) 
Example #28
Source File: test_greedy_search.py    From OpenNMT-py with MIT License 5 votes vote down vote up
def test_doesnt_predict_eos_if_shorter_than_min_len(self):
        # batch 0 will always predict EOS. The other batches will predict
        # non-eos scores.
        for batch_sz in [1, 3]:
            n_words = 100
            _non_eos_idxs = [47]
            valid_score_dist = torch.log_softmax(torch.tensor(
                [6., 5.]), dim=0)
            min_length = 5
            eos_idx = 2
            lengths = torch.randint(0, 30, (batch_sz,))
            samp = GreedySearch(
                0, 1, 2, batch_sz, min_length,
                False, set(), False, 30, 1., 1)
            samp.initialize(torch.zeros(1), lengths)
            all_attns = []
            for i in range(min_length + 4):
                word_probs = torch.full(
                    (batch_sz, n_words), -float('inf'))
                # "best" prediction is eos - that should be blocked
                word_probs[0, eos_idx] = valid_score_dist[0]
                # include at least one prediction OTHER than EOS
                # that is greater than -1e20
                word_probs[0, _non_eos_idxs[0]] = valid_score_dist[1]
                word_probs[1:, _non_eos_idxs[0] + i] = 0

                attns = torch.randn(1, batch_sz, 53)
                all_attns.append(attns)
                samp.advance(word_probs, attns)
                if i < min_length:
                    self.assertTrue(
                        samp.topk_scores[0].allclose(valid_score_dist[1]))
                    self.assertTrue(
                        samp.topk_scores[1:].eq(0).all())
                elif i == min_length:
                    # now batch 0 has ended and no others have
                    self.assertTrue(samp.is_finished[0, :].eq(1).all())
                    self.assertTrue(samp.is_finished[1:, 1:].eq(0).all())
                else:  # i > min_length
                    break 
Example #29
Source File: label_smooth.py    From pytorch-loss with MIT License 5 votes vote down vote up
def forward(self, logits, label):
        '''
        args: logits: tensor of shape (N, C, H, W)
        args: label: tensor of shape(N, H, W)
        '''
        # overcome ignored label
        logits = logits.float() # use fp32 to avoid nan
        with torch.no_grad():
            num_classes = logits.size(1)
            label = label.clone().detach()
            ignore = label == self.lb_ignore
            n_valid = (ignore == 0).sum()
            label[ignore] = 0
            lb_pos, lb_neg = 1. - self.lb_smooth, self.lb_smooth / num_classes
            lb_one_hot = torch.empty_like(logits).fill_(
                lb_neg).scatter_(1, label.unsqueeze(1), lb_pos).detach()

        logs = self.log_softmax(logits)
        loss = -torch.sum(logs * lb_one_hot, dim=1)
        loss[ignore] = 0
        if self.reduction == 'mean':
            loss = loss.sum() / n_valid
        if self.reduction == 'sum':
            loss = loss.sum()

        return loss



##
# version 2: user derived grad computation 
Example #30
Source File: affinity_loss.py    From pytorch-loss with MIT License 5 votes vote down vote up
def forward(self, logits, labels):
        ignore_mask = labels.cpu() == self.ignore_lb
        n_valid = ignore_mask.numel() - ignore_mask.sum().item()
        indices = [
                # center,               # edge
            ((1, None, None, None), (None, -1, None, None)), # up
            ((None, -1, None, None), (1, None, None, None)), # down
            ((None, None, 1, None), (None, None, None, -1)), # left
            ((None, None, None, -1), (None, None, 1, None)), # right
            ((1, None, 1, None), (None, -1, None, -1)), # up-left
            ((1, None, None, -1), (None, -1, 1, None)), # up-right
            ((None, -1, 1, None), (1, None, None, -1)), # down-left
            ((None, -1, None, -1), (1, None, 1, None)), # down-right
        ]

        losses = []
        probs = torch.softmax(logits, dim=1)
        log_probs = torch.log_softmax(logits, dim=1)
        for idx_c, idx_e in indices:
            lbcenter = labels[:, idx_c[0]:idx_c[1], idx_c[2]:idx_c[3]].detach()
            lbedge = labels[:, idx_e[0]:idx_e[1], idx_e[2]:idx_e[3]].detach()
            igncenter = ignore_mask[:, idx_c[0]:idx_c[1], idx_c[2]:idx_c[3]].detach()
            ignedge = ignore_mask[:, idx_e[0]:idx_e[1], idx_e[2]:idx_e[3]].detach()
            lgp_center = probs[:, :, idx_c[0]:idx_c[1], idx_c[2]:idx_c[3]]
            lgp_edge = probs[:, :, idx_e[0]:idx_e[1], idx_e[2]:idx_e[3]]
            prob_edge = probs[:, :, idx_e[0]:idx_e[1], idx_e[2]:idx_e[3]]
            kldiv = (prob_edge * (lgp_edge - lgp_center)).sum(dim=1)

            kldiv[ignedge | igncenter] = 0
            loss = torch.where(
                lbcenter == lbedge,
                self.lambda_edge * kldiv,
                self.lambda_not_edge * F.relu(self.kl_margin - kldiv, inplace=True)
            ).sum() / n_valid
            losses.append(loss)

        return sum(losses) / 8