Python torch.nn.utils.clip_grad_norm() Examples
The following are 3
code examples of torch.nn.utils.clip_grad_norm().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.nn.utils
, or try the search function
.
Example #1
Source File: Optim.py From reversible-rnn with MIT License | 6 votes |
def step(self): """Update the model parameters based on current gradients. Optionally, will employ gradient modification or update learning rate. """ self._step += 1 # Decay method used in tensor2tensor. if self.decay_method == "noam": self._set_rate( self.original_lr * (self.model_size ** (-0.5) * min(self._step ** (-0.5), self._step * self.warmup_steps**(-1.5)))) if self.max_grad_norm: total_norm = clip_grad_norm(self.params, self.max_grad_norm) self.optimizer.step()
Example #2
Source File: main.py From tsn-pytorch with BSD 2-Clause "Simplified" License | 4 votes |
def train(train_loader, model, criterion, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() if args.no_partialbn: model.module.partialBN(False) else: model.module.partialBN(True) # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) target = target.cuda(async=True) input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1,5)) losses.update(loss.data[0], input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() if args.clip_gradient is not None: total_norm = clip_grad_norm(model.parameters(), args.clip_gradient) if total_norm > args.clip_gradient: print("clipping gradient: {} with coef {}".format(total_norm, args.clip_gradient / total_norm)) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print(('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5, lr=optimizer.param_groups[-1]['lr'])))
Example #3
Source File: trainval.py From s3d.pytorch with MIT License | 4 votes |
def train(train_loader, model, criterion, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target,vid) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) target = target.cuda(async=True) input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var)[0] loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1,5)) losses.update(loss.data[0], input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() if args.clip_gradient is not None: total_norm = clip_grad_norm(model.parameters(), args.clip_gradient) if total_norm > args.clip_gradient: log.l.info("clipping gradient: {} with coef {}".format(total_norm, args.clip_gradient / total_norm)) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: log.l.info(('Epoch: [{0}][{1}/{2}], lr: {lr:.5f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5, lr=optimizer.param_groups[-1]['lr'])))