Python torch.nn.KLDivLoss() Examples
The following are 30
code examples of torch.nn.KLDivLoss().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.nn
, or try the search function
.
Example #1
Source File: seq2slate_tf_trainer.py From ReAgent with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__( self, seq2slate_net: Seq2SlateTransformerNet, parameters: Seq2SlateParameters, minibatch_size: int, use_gpu: bool = False, policy_optimizer: Optimizer__Union = field( # noqa: B008 default_factory=Optimizer__Union.default ), ) -> None: self.parameters = parameters self.use_gpu = use_gpu self.seq2slate_net = seq2slate_net self.minibatch_size = minibatch_size self.minibatch = 0 self.optimizer = policy_optimizer.make_optimizer( self.seq2slate_net.parameters() ) self.kl_div_loss = nn.KLDivLoss(reduction="batchmean")
Example #2
Source File: seq2slate_dr_trainer.py From ReAgent with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__( self, seq2slate_net: Seq2SlateTransformerNet, parameters: Seq2SlateParameters, minibatch_size: int, use_gpu: bool = False, policy_optimizer: Optimizer__Union = field( # noqa: B008 default_factory=Optimizer__Union.default ), ) -> None: self.parameters = parameters self.use_gpu = use_gpu self.seq2slate_net = seq2slate_net self.minibatch_size = minibatch_size self.minibatch = 0 self.optimizer = policy_optimizer.make_optimizer( self.seq2slate_net.parameters() ) # TODO: T62269969 add baseline_net in training self.kl_div_loss = nn.KLDivLoss(reduction="none")
Example #3
Source File: seq2slate_attn_trainer.py From ReAgent with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__( self, seq2slate_net: Seq2SlateTransformerNet, minibatch_size: int = 1024, loss_reporter=None, use_gpu: bool = False, policy_optimizer: Optimizer__Union = field( # noqa: B008 default_factory=Optimizer__Union.default ), ) -> None: self.loss_reporter = loss_reporter self.use_gpu = use_gpu self.seq2slate_net = seq2slate_net self.minibatch_size = minibatch_size self.minibatch = 0 self.optimizer = policy_optimizer.make_optimizer( self.seq2slate_net.parameters() ) self.log_softmax = nn.LogSoftmax(dim=1) self.kl_loss = nn.KLDivLoss(reduction="batchmean") if self.loss_reporter is None: self.loss_reporter = NoOpLossReporter()
Example #4
Source File: Loss.py From video-caption-openNMT.pytorch with MIT License | 6 votes |
def __init__(self, generator, tgt_vocab, normalization="sents", label_smoothing=0.0): super(NMTLossCompute, self).__init__(generator, tgt_vocab) assert (label_smoothing >= 0.0 and label_smoothing <= 1.0) if label_smoothing > 0: # When label smoothing is turned on, # KL-divergence between q_{smoothed ground truth prob.}(w) # and p_{prob. computed by model}(w) is minimized. # If label smoothing value is set to zero, the loss # is equivalent to NLLLoss or CrossEntropyLoss. # All non-true labels are uniformly set to low-confidence. self.criterion = nn.KLDivLoss(size_average=False) one_hot = torch.randn(1, len(tgt_vocab)) one_hot.fill_(label_smoothing / (len(tgt_vocab) - 2)) one_hot[0][self.padding_idx] = 0 self.register_buffer('one_hot', one_hot) else: weight = torch.ones(len(tgt_vocab)) weight[self.padding_idx] = 0 self.criterion = nn.NLLLoss(weight, size_average=False) self.confidence = 1.0 - label_smoothing
Example #5
Source File: Loss.py From DC-NeuralConversation with MIT License | 6 votes |
def __init__(self, generator, tgt_vocab, normalization="sents", label_smoothing=0.0): super(NMTLossCompute, self).__init__(generator, tgt_vocab) assert (label_smoothing >= 0.0 and label_smoothing <= 1.0) if label_smoothing > 0: # When label smoothing is turned on, # KL-divergence between q_{smoothed ground truth prob.}(w) # and p_{prob. computed by model}(w) is minimized. # If label smoothing value is set to zero, the loss # is equivalent to NLLLoss or CrossEntropyLoss. # All non-true labels are uniformly set to low-confidence. self.criterion = nn.KLDivLoss(size_average=False) one_hot = torch.randn(1, len(tgt_vocab)) one_hot.fill_(label_smoothing / (len(tgt_vocab) - 2)) one_hot[0][self.padding_idx] = 0 self.register_buffer('one_hot', one_hot) else: weight = torch.ones(len(tgt_vocab)) weight[self.padding_idx] = 0 self.criterion = nn.NLLLoss(weight, size_average=False) self.confidence = 1.0 - label_smoothing
Example #6
Source File: Loss.py From data2text-entity-py with MIT License | 6 votes |
def __init__(self, generator, tgt_vocab, normalization="sents", label_smoothing=0.0): super(NMTLossCompute, self).__init__(generator, tgt_vocab) assert (label_smoothing >= 0.0 and label_smoothing <= 1.0) if label_smoothing > 0: # When label smoothing is turned on, # KL-divergence between q_{smoothed ground truth prob.}(w) # and p_{prob. computed by model}(w) is minimized. # If label smoothing value is set to zero, the loss # is equivalent to NLLLoss or CrossEntropyLoss. # All non-true labels are uniformly set to low-confidence. self.criterion = nn.KLDivLoss(size_average=False) one_hot = torch.randn(1, len(tgt_vocab)) one_hot.fill_(label_smoothing / (len(tgt_vocab) - 2)) one_hot[0][self.padding_idx] = 0 self.register_buffer('one_hot', one_hot) else: weight = torch.ones(len(tgt_vocab)) weight[self.padding_idx] = 0 self.criterion = nn.NLLLoss(weight, size_average=False) self.confidence = 1.0 - label_smoothing
Example #7
Source File: label_smoothing_loss.py From espnet with Apache License 2.0 | 6 votes |
def __init__( self, size, padding_idx, smoothing, normalize_length=False, criterion=nn.KLDivLoss(reduction="none"), ): """Construct an LabelSmoothingLoss object.""" super(LabelSmoothingLoss, self).__init__() self.criterion = criterion self.padding_idx = padding_idx self.confidence = 1.0 - smoothing self.smoothing = smoothing self.size = size self.true_dist = None self.normalize_length = normalize_length
Example #8
Source File: Loss.py From reversible-rnn with MIT License | 6 votes |
def __init__(self, generator, tgt_vocab, label_smoothing=0.0): super(NMTLossCompute, self).__init__(generator, tgt_vocab) assert (label_smoothing >= 0.0 and label_smoothing <= 1.0) self.tgt_vocab_len = len(tgt_vocab) if label_smoothing > 0: # When label smoothing is turned on, # KL-divergence between q_{smoothed ground truth prob.}(w) # and p_{prob. computed by model}(w) is minimized. # If label smoothing value is set to zero, the loss # is equivalent to NLLLoss or CrossEntropyLoss. # All non-true labels are uniformly set to low-confidence. self.criterion = nn.KLDivLoss(size_average=False) one_hot = torch.randn(1, len(tgt_vocab)) one_hot.fill_(label_smoothing / (len(tgt_vocab) - 2)) one_hot[0][self.padding_idx] = 0 self.register_buffer('one_hot', one_hot) else: weight = torch.ones(len(tgt_vocab)) weight[self.padding_idx] = 0 self.criterion = nn.NLLLoss(weight, size_average=False) # IMPORTANT: NLLLoss is what we use. Interesting that size_average=False # ipdb.set_trace() self.confidence = 1.0 - label_smoothing
Example #9
Source File: updater.py From born_again_neuralnet with MIT License | 5 votes |
def kd_loss(self, outputs, labels, teacher_outputs, alpha=0.2, T=20): KD_loss = nn.KLDivLoss()(F.log_softmax(outputs/T, dim=1), F.softmax(teacher_outputs/T, dim=1)) * \ alpha + F.cross_entropy(outputs, labels) * (1. - alpha) return KD_loss
Example #10
Source File: binDeltaLosses.py From multi-modal-regression with MIT License | 5 votes |
def __init__(self, alpha): super().__init__() self.alpha = alpha self.mse = nn.MSELoss().cuda() self.kl = nn.KLDivLoss().cuda()
Example #11
Source File: losses.py From self-critical.pytorch with MIT License | 5 votes |
def __init__(self, size=0, padding_idx=0, smoothing=0.0): super(LabelSmoothing, self).__init__() self.criterion = nn.KLDivLoss(size_average=False, reduce=False) # self.padding_idx = padding_idx self.confidence = 1.0 - smoothing self.smoothing = smoothing # self.size = size self.true_dist = None
Example #12
Source File: latent_clustering_engine.py From tatk with Apache License 2.0 | 5 votes |
def __init__(self, model, args, verbose=False): super(LatentClusteringEngine, self).__init__(model, args, verbose) self.crit = nn.CrossEntropyLoss(reduction='sum') self.kldiv = nn.KLDivLoss(reduction='sum') self.cluster_crit = nn.NLLLoss(reduction='sum') self.sel_crit = Criterion( self.model.item_dict, bad_toks=['<disconnect>', '<disagree>'], reduction='mean' if args.sep_sel else 'none') self.sel_model = utils.load_model(args.selection_model_file) self.sel_model.eval()
Example #13
Source File: losses.py From AlignedReID with MIT License | 5 votes |
def __init__(self): super(KLMutualLoss,self).__init__() self.kl_loss = nn.KLDivLoss(size_average=False) self.log_softmax = nn.functional.log_softmax self.softmax = nn.functional.softmax
Example #14
Source File: my_loss_function.py From Teacher-free-Knowledge-Distillation with MIT License | 5 votes |
def loss_kd_self(outputs, labels, teacher_outputs, params): """ loss function for self training: Tf-KD_{self} """ alpha = params.alpha T = params.temperature loss_CE = F.cross_entropy(outputs, labels) D_KL = nn.KLDivLoss()(F.log_softmax(outputs/T, dim=1), F.softmax(teacher_outputs/T, dim=1)) * (T * T) * params.multiplier # multiple is 1.0 in most of cases, some cases are 10 or 50 KD_loss = (1. - alpha)*loss_CE + alpha*D_KL return KD_loss
Example #15
Source File: losses.py From ImageCaptioning.pytorch with MIT License | 5 votes |
def __init__(self, size=0, padding_idx=0, smoothing=0.0): super(LabelSmoothing, self).__init__() self.criterion = nn.KLDivLoss(size_average=False, reduce=False) # self.padding_idx = padding_idx self.confidence = 1.0 - smoothing self.smoothing = smoothing # self.size = size self.true_dist = None
Example #16
Source File: affinity_loss.py From pytorch-loss with MIT License | 5 votes |
def __init__(self, kl_margin, lambda_edge=1., lambda_not_edge=1., ignore_lb=255): super(AffinityFieldLoss, self).__init__() self.kl_margin = kl_margin self.ignore_lb = ignore_lb self.lambda_edge = lambda_edge self.lambda_not_edge = lambda_not_edge self.kldiv = nn.KLDivLoss(reduction='none')
Example #17
Source File: my_loss_function.py From Teacher-free-Knowledge-Distillation with MIT License | 5 votes |
def loss_kd(outputs, labels, teacher_outputs, params): """ loss function for Knowledge Distillation (KD) """ alpha = params.alpha T = params.temperature loss_CE = F.cross_entropy(outputs, labels) D_KL = nn.KLDivLoss()(F.log_softmax(outputs/T, dim=1), F.softmax(teacher_outputs/T, dim=1)) * (T * T) KD_loss = (1. - alpha)*loss_CE + alpha*D_KL return KD_loss
Example #18
Source File: label_smoothing.py From MTN with MIT License | 5 votes |
def __init__(self, size, padding_idx, smoothing=0.0): super(LabelSmoothing, self).__init__() self.criterion = nn.KLDivLoss(size_average=False) self.padding_idx = padding_idx self.confidence = 1.0 - smoothing self.smoothing = smoothing self.size = size self.true_dist = None
Example #19
Source File: hd3losses.py From hd3 with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __call__(self, ms_prob, ms_pred, gt, corr_range, ds=6): B, C, H, W = gt.size() lv = len(ms_prob) criterion = nn.KLDivLoss(reduction='batchmean').cuda() losses = {} kld_loss = 0 for l in range(lv): scaled_gt, valid_mask = downsample_flow(gt, 1 / 2**(ds - l)) if self.task == 'stereo': scaled_gt = scaled_gt[:, 0, :, :].unsqueeze(1) if l > 0: scaled_gt = scaled_gt - F.interpolate( ms_pred[l - 1], scale_factor=2, mode='bilinear', align_corners=True) scaled_gt = scaled_gt / 2**(ds - l) gt_dist = vector2density(scaled_gt, corr_range[l], self.dim) * valid_mask kld_loss += 4**(ds - l) / (H * W) * criterion( F.log_softmax(ms_prob[l], dim=1), gt_dist.detach()) losses['total'] = kld_loss for loss_type, loss_value in losses.items(): losses[loss_type] = loss_value.reshape(1) return losses
Example #20
Source File: model.py From mrqa with Apache License 2.0 | 5 votes |
def forward_qa(self, input_ids, token_type_ids, attention_mask, start_positions, end_positions, global_step): sequence_output, _ = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False) cls_embedding = sequence_output[:, 0] if self.concat: sep_embedding = self.get_sep_embedding(input_ids, sequence_output) hidden = torch.cat([cls_embedding, sep_embedding], dim=1) else: hidden = sequence_output[:, 0] # [b, d] : [CLS] representation log_prob = self.discriminator(hidden) targets = torch.ones_like(log_prob) * (1 / self.num_classes) # As with NLLLoss, the input given is expected to contain log-probabilities # and is not restricted to a 2D Tensor. The targets are given as probabilities kl_criterion = nn.KLDivLoss(reduction="batchmean") if self.anneal: self.dis_lambda = self.dis_lambda * kl_coef(global_step) kld = self.dis_lambda * kl_criterion(log_prob, targets) logits = self.qa_outputs(sequence_output) start_logits, end_logits = logits.split(1, dim=-1) start_logits = start_logits.squeeze(-1) end_logits = end_logits.squeeze(-1) # If we are on multi-GPU, split add a dimension if len(start_positions.size()) > 1: start_positions = start_positions.squeeze(-1) if len(end_positions.size()) > 1: end_positions = end_positions.squeeze(-1) # sometimes the start/end positions are outside our model inputs, we ignore these terms ignored_index = start_logits.size(1) start_positions.clamp_(0, ignored_index) end_positions.clamp_(0, ignored_index) loss_fct = nn.CrossEntropyLoss(ignore_index=ignored_index) start_loss = loss_fct(start_logits, start_positions) end_loss = loss_fct(end_logits, end_positions) qa_loss = (start_loss + end_loss) / 2 total_loss = qa_loss + kld return total_loss
Example #21
Source File: trainer.py From nni with MIT License | 5 votes |
def __init__(self, temperature): super().__init__() self.temperature = temperature # self.kl_loss = nn.KLDivLoss(reduction = 'batchmean') self.kl_loss = nn.KLDivLoss()
Example #22
Source File: loss.py From joeynmt with Apache License 2.0 | 5 votes |
def __init__(self, pad_index: int, smoothing: float = 0.0): super(XentLoss, self).__init__() self.smoothing = smoothing self.pad_index = pad_index if self.smoothing <= 0.0: # standard xent loss self.criterion = nn.NLLLoss(ignore_index=self.pad_index, reduction='sum') else: # custom label-smoothed loss, computed with KL divergence loss self.criterion = nn.KLDivLoss(reduction='sum')
Example #23
Source File: binDeltaLosses.py From multi-modal-regression with MIT License | 5 votes |
def __init__(self, alpha, kmeans_file, my_loss): super().__init__() self.alpha = alpha kmeans = pickle.load(open(kmeans_file, 'rb')) self.cluster_centers_ = Variable(torch.from_numpy(kmeans.cluster_centers_).float()).cuda() self.my_loss = my_loss self.kl = nn.KLDivLoss().cuda()
Example #24
Source File: binDeltaLosses.py From multi-modal-regression with MIT License | 5 votes |
def __init__(self, alpha, gmm_file, my_loss): super().__init__() self.alpha = alpha gmm = pickle.load(open(gmm_file, 'rb')) self.cluster_centers = Variable(torch.from_numpy(gmm.means_).float()).cuda() self.n_clusters = gmm.n_components self.my_loss = my_loss self.kl = nn.KLDivLoss().cuda()
Example #25
Source File: binDeltaLosses.py From multi-modal-regression with MIT License | 5 votes |
def __init__(self, alpha, kmeans_file, my_loss): super().__init__() self.alpha = alpha kmeans = pickle.load(open(kmeans_file, 'rb')) self.cluster_centers = Variable(torch.from_numpy(convert_dictionary(kmeans.cluster_centers_)).float()).cuda() self.n_clusters = kmeans.n_clusters self.my_loss = my_loss self.kl = nn.KLDivLoss().cuda()
Example #26
Source File: binDeltaLosses.py From multi-modal-regression with MIT License | 5 votes |
def __init__(self, alpha, kmeans_file, my_loss=None): super().__init__() self.alpha = alpha kmeans = pickle.load(open(kmeans_file, 'rb')) self.cluster_centers = Variable(torch.from_numpy(kmeans.cluster_centers_).float()).cuda() self.n_clusters = kmeans.n_clusters if my_loss is None: self.mse = nn.MSELoss(reduce=False).cuda() else: self.mse = my_loss self.kl = nn.KLDivLoss().cuda()
Example #27
Source File: __init__.py From BPT with MIT License | 5 votes |
def __init__(self, size, smoothing=0.0): """Label Smoothing module args: size: vocab_size smoothing: smoothing ratio """ super(LabelSmoothing, self).__init__() self.criterion = nn.KLDivLoss(reduction='sum') self.size = size self.smoothing = smoothing
Example #28
Source File: utils.py From GoogleConceptualCaptioning with MIT License | 5 votes |
def __init__(self, size=0, padding_idx=0, smoothing=0.0): super(LabelSmoothing, self).__init__() self.criterion = nn.KLDivLoss(size_average=False, reduce=False) # self.padding_idx = padding_idx self.confidence = 1.0 - smoothing self.smoothing = smoothing # self.size = size self.true_dist = None
Example #29
Source File: ranking_listwise_evaluator.py From ReAgent with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, seq2slate_net, slate_size: int, calc_cpe: bool) -> None: self.seq2slate_net = seq2slate_net self.slate_size = slate_size self.calc_cpe = calc_cpe self.ndcg = [] self.dcg = [] self.mean_ap = [] self.log_softmax = nn.LogSoftmax(dim=1) self.kl_loss = nn.KLDivLoss(reduction="batchmean")
Example #30
Source File: loss.py From xfer with Apache License 2.0 | 5 votes |
def __init__(self, temperature): super(TemperatureScaledKLDivLoss, self).__init__() self.temperature = temperature self.kullback_leibler_divergence = nn.KLDivLoss(reduction="batchmean")