Python torch.optim.SGD Examples
The following are 30
code examples of torch.optim.SGD().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.optim
, or try the search function
.
Example #1
Source File: train.py From pytorch-multigpu with MIT License | 7 votes |
def main(): best_acc = 0 device = 'cuda' if torch.cuda.is_available() else 'cpu' print('==> Preparing data..') transforms_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) dataset_train = CIFAR10(root='../data', train=True, download=True, transform=transforms_train) train_loader = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=args.num_worker) # there are 10 classes so the dataset name is cifar-10 classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') print('==> Making model..') net = pyramidnet() net = nn.DataParallel(net) net = net.to(device) num_params = sum(p.numel() for p in net.parameters() if p.requires_grad) print('The number of parameters of model is', num_params) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=args.lr) # optimizer = optim.SGD(net.parameters(), lr=args.lr, # momentum=0.9, weight_decay=1e-4) train(net, criterion, optimizer, train_loader, device)
Example #2
Source File: optimizer.py From ACAN with MIT License | 7 votes |
def create_optimizer(args, optim_params): if args.optimizer == 'sgd': return optim.SGD(optim_params, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optimizer == 'adagrad': return optim.Adagrad(optim_params, args.lr, weight_decay=args.weight_decay) elif args.optimizer == 'adam': return optim.Adam(optim_params, args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) elif args.optimizer == 'amsgrad': return optim.Adam(optim_params, args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay, amsgrad=True) elif args.optimizer == 'adabound': from adabound import AdaBound return AdaBound(optim_params, args.lr, betas=(args.beta1, args.beta2), final_lr=args.final_lr, gamma=args.gamma, weight_decay=args.weight_decay) else: assert args.optimizer == 'amsbound' from adabound import AdaBound return AdaBound(optim_params, args.lr, betas=(args.beta1, args.beta2), final_lr=args.final_lr, gamma=args.gamma, weight_decay=args.weight_decay, amsbound=True)
Example #3
Source File: utility.py From OISR-PyTorch with BSD 2-Clause "Simplified" License | 6 votes |
def make_optimizer(args, my_model): trainable = filter(lambda x: x.requires_grad, my_model.parameters()) if args.optimizer == 'SGD': optimizer_function = optim.SGD kwargs = {'momentum': args.momentum} elif args.optimizer == 'ADAM': optimizer_function = optim.Adam kwargs = { 'betas': args.betas, 'eps': args.epsilon } elif args.optimizer == 'RMSprop': optimizer_function = optim.RMSprop kwargs = {'eps': args.epsilon} kwargs['lr'] = args.lr kwargs['weight_decay'] = args.weight_decay return optimizer_function(trainable, **kwargs)
Example #4
Source File: utils.py From Attention-Gated-Networks with MIT License | 6 votes |
def get_optimizer(option, params): opt_alg = 'sgd' if not hasattr(option, 'optim') else option.optim if opt_alg == 'sgd': optimizer = optim.SGD(params, lr=option.lr_rate, momentum=0.9, nesterov=True, weight_decay=option.l2_reg_weight) if opt_alg == 'adam': optimizer = optim.Adam(params, lr=option.lr_rate, betas=(0.9, 0.999), weight_decay=option.l2_reg_weight) return optimizer
Example #5
Source File: tutorial.py From TaskBot with GNU General Public License v3.0 | 6 votes |
def trainIters(encoder, decoder, epochs, dataset, init_epochs, learning_rate=0.01): plot_losses = [] encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate) criterion = nn.NLLLoss() for epoch in range(init_epochs, epochs+init_epochs): for i, (input_tensor, target_tensor) in enumerate(dataset.gen()): loss = train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion) if loss: plot_losses.append(loss) if i % 1000==0: print("epoch {}, step: {}, loss: {}".format( epoch, i, loss )) else: print(input_tensor, target_tensor) print("save model") torch.save(encoder.state_dict(), "epoch_{}_step_{}_encoder_loss_{}.pkl".format(epoch, i, loss)) torch.save(decoder.state_dict(), "epoch_{}_step_{}_decoder_loss_{}.pkl".format(epoch, i, loss))
Example #6
Source File: optimizer.py From SegmenTron with Apache License 2.0 | 6 votes |
def get_optimizer(model): parameters = _get_paramters(model) opt_lower = cfg.SOLVER.OPTIMIZER.lower() if opt_lower == 'sgd': optimizer = optim.SGD( parameters, lr=cfg.SOLVER.LR, momentum=cfg.SOLVER.MOMENTUM, weight_decay=cfg.SOLVER.WEIGHT_DECAY) elif opt_lower == 'adam': optimizer = optim.Adam( parameters, lr=cfg.SOLVER.LR, eps=cfg.SOLVER.EPSILON, weight_decay=cfg.SOLVER.WEIGHT_DECAY) elif opt_lower == 'adadelta': optimizer = optim.Adadelta( parameters, lr=cfg.SOLVER.LR, eps=cfg.SOLVER.EPSILON, weight_decay=cfg.SOLVER.WEIGHT_DECAY) elif opt_lower == 'rmsprop': optimizer = optim.RMSprop( parameters, lr=cfg.SOLVER.LR, alpha=0.9, eps=cfg.SOLVER.EPSILON, momentum=cfg.SOLVER.MOMENTUM, weight_decay=cfg.SOLVER.WEIGHT_DECAY) else: raise ValueError("Expected optimizer method in [sgd, adam, adadelta, rmsprop], but received " "{}".format(opt_lower)) return optimizer
Example #7
Source File: finetune_office31.py From transferlearning with MIT License | 6 votes |
def get_optimizer(model_name): learning_rate = LEARNING_RATE if model_name == 'alexnet': param_group = [ {'params': model.features.parameters(), 'lr': learning_rate}] for i in range(6): param_group += [{'params': model.classifier[i].parameters(), 'lr': learning_rate}] param_group += [{'params': model.classifier[6].parameters(), 'lr': learning_rate * 10}] elif model_name == 'resnet': param_group = [] for k, v in model.named_parameters(): if not k.__contains__('fc'): param_group += [{'params': v, 'lr': learning_rate}] else: param_group += [{'params': v, 'lr': learning_rate * 10}] optimizer = optim.SGD(param_group, momentum=MOMENTUM) return optimizer # Schedule learning rate
Example #8
Source File: utility.py From OISR-PyTorch with BSD 2-Clause "Simplified" License | 6 votes |
def make_optimizer(args, my_model): trainable = filter(lambda x: x.requires_grad, my_model.parameters()) if args.optimizer == 'SGD': optimizer_function = optim.SGD kwargs = {'momentum': args.momentum} elif args.optimizer == 'ADAM': optimizer_function = optim.Adam kwargs = { 'betas': args.betas, 'eps': args.epsilon } elif args.optimizer == 'RMSprop': optimizer_function = optim.RMSprop kwargs = {'eps': args.epsilon} kwargs['lr'] = args.lr kwargs['weight_decay'] = args.weight_decay return optimizer_function(trainable, **kwargs)
Example #9
Source File: utility.py From OISR-PyTorch with BSD 2-Clause "Simplified" License | 6 votes |
def make_optimizer(args, my_model): trainable = filter(lambda x: x.requires_grad, my_model.parameters()) if args.optimizer == 'SGD': optimizer_function = optim.SGD kwargs = {'momentum': args.momentum} elif args.optimizer == 'ADAM': optimizer_function = optim.Adam kwargs = { 'betas': args.betas, 'eps': args.epsilon } elif args.optimizer == 'RMSprop': optimizer_function = optim.RMSprop kwargs = {'eps': args.epsilon} kwargs['lr'] = args.lr kwargs['weight_decay'] = args.weight_decay return optimizer_function(trainable, **kwargs)
Example #10
Source File: utility.py From OISR-PyTorch with BSD 2-Clause "Simplified" License | 6 votes |
def make_optimizer(args, my_model): trainable = filter(lambda x: x.requires_grad, my_model.parameters()) if args.optimizer == 'SGD': optimizer_function = optim.SGD kwargs = {'momentum': args.momentum} elif args.optimizer == 'ADAM': optimizer_function = optim.Adam kwargs = { 'betas': args.betas, 'eps': args.epsilon } elif args.optimizer == 'RMSprop': optimizer_function = optim.RMSprop kwargs = {'eps': args.epsilon} kwargs['lr'] = args.lr kwargs['weight_decay'] = args.weight_decay return optimizer_function(trainable, **kwargs)
Example #11
Source File: utility.py From OISR-PyTorch with BSD 2-Clause "Simplified" License | 6 votes |
def make_optimizer(args, my_model): trainable = filter(lambda x: x.requires_grad, my_model.parameters()) if args.optimizer == 'SGD': optimizer_function = optim.SGD kwargs = {'momentum': args.momentum} elif args.optimizer == 'ADAM': optimizer_function = optim.Adam kwargs = { 'betas': args.betas, 'eps': args.epsilon } elif args.optimizer == 'RMSprop': optimizer_function = optim.RMSprop kwargs = {'eps': args.epsilon} kwargs['lr'] = args.lr kwargs['weight_decay'] = args.weight_decay return optimizer_function(trainable, **kwargs)
Example #12
Source File: agent_task.py From ConvLab with MIT License | 6 votes |
def __init__(self, model, corpus, args, name, tune_pi_only): self.model = model self.corpus = corpus self.args = args self.name = name self.raw_goal = None self.vec_goals_list = None self.logprobs = None print("Do we only tune the policy: {}".format(tune_pi_only)) self.opt = optim.SGD( [p for n, p in self.model.named_parameters() if 'c2z' in n or not tune_pi_only], lr=self.args.rl_lr, momentum=self.args.momentum, nesterov=(self.args.nesterov and self.args.momentum > 0)) # self.opt = optim.Adam(self.model.parameters(), lr=0.01) # self.opt = optim.RMSprop(self.model.parameters(), lr=0.0005) self.all_rewards = [] self.all_grads = [] self.model.train()
Example #13
Source File: model_torch.py From machine-learning-for-programming-samples with MIT License | 6 votes |
def _make_optimizer(self): if self.optimizer is not None: return # Also prepare optimizer: optimizer_name = self.hyperparameters["optimizer"].lower() if optimizer_name == "sgd": self.optimizer = optim.SGD( params=self.parameters(), lr=self.hyperparameters["learning_rate"], momentum=self.hyperparameters["momentum"], ) elif optimizer_name == "rmsprop": self.optimizer = optim.RMSprop( params=self.parameters(), lr=self.hyperparameters["learning_rate"], alpha=self.params["learning_rate_decay"], momentum=self.params["momentum"], ) elif optimizer_name == "adam": self.optimizer = optim.Adam( params=self.parameters(), lr=self.hyperparameters["learning_rate"], ) else: raise Exception('Unknown optimizer "%s".' % (self.params["optimizer"]))
Example #14
Source File: task.py From cloudml-samples with Apache License 2.0 | 6 votes |
def train_model(args): """Load the data, train the model, test the model, export / save the model """ torch.manual_seed(args.seed) # Open our dataset train_loader, test_loader = data_utils.load_data(args.test_split, args.batch_size) # Create the model net = model.SonarDNN().double() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, nesterov=False) # Train / Test the model for epoch in range(1, args.epochs + 1): train(net, train_loader, optimizer, epoch) test(net, test_loader) # Export the trained model torch.save(net.state_dict(), args.model_name) if args.model_dir: # Save the model to GCS data_utils.save_model(args.model_dir, args.model_name)
Example #15
Source File: pytorch_sample.py From mars with Apache License 2.0 | 5 votes |
def main(): import torch.nn as nn import torch.distributed as dist import torch.optim as optim import torch.utils.data dist.init_process_group(backend='gloo') torch.manual_seed(42) data = torch.rand((1000, 32), dtype=torch.float32) labels = torch.randint(1, (1000, 10), dtype=torch.float32) train_dataset = torch.utils.data.TensorDataset(data, labels) train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=32, shuffle=False, sampler=train_sampler) model = nn.parallel.DistributedDataParallel(get_model()) optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) criterion = nn.BCELoss() for _ in range(2): # 2 epochs for _, (batch_data, batch_labels) in enumerate(train_loader): outputs = model(batch_data) loss = criterion(outputs.squeeze(), batch_labels) optimizer.zero_grad() loss.backward() optimizer.step()
Example #16
Source File: dataset_sample.py From mars with Apache License 2.0 | 5 votes |
def main(): import torch.nn as nn import torch.distributed as dist import torch.optim as optim import torch.utils.data import mars.tensor as mt from mars.learn.contrib.pytorch import MarsDataset, MarsDistributedSampler dist.init_process_group(backend='gloo') torch.manual_seed(42) data = mt.named_tensor(name='data') labels = mt.named_tensor(name='labels') train_dataset = MarsDataset(data, labels) train_sampler = MarsDistributedSampler(train_dataset) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=32, shuffle=False, sampler=train_sampler) model = nn.parallel.DistributedDataParallel(get_model()) optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) criterion = nn.BCELoss() for _ in range(2): # 2 epochs for _, (batch_data, batch_labels) in enumerate(train_loader): outputs = model(batch_data) loss = criterion(outputs.squeeze(), batch_labels) optimizer.zero_grad() loss.backward() optimizer.step()
Example #17
Source File: utils_cifar.py From pytorch-i-revnet with MIT License | 5 votes |
def train(model, trainloader, trainset, epoch, num_epochs, batch_size, lr, use_cuda, in_shape): model.train() train_loss = 0 correct = 0 total = 0 optimizer = optim.SGD(model.parameters(), lr=learning_rate(lr, epoch), momentum=0.9, weight_decay=5e-4) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print('| Number of Trainable Parameters: ' + str(params)) print('\n=> Training Epoch #%d, LR=%.4f' % (epoch, learning_rate(lr, epoch))) for batch_idx, (inputs, targets) in enumerate(trainloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # GPU settings optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets) out, out_bij = model(inputs) # Forward Propagation loss = criterion(out, targets) # Loss loss.backward() # Backward Propagation optimizer.step() # Optimizer update try: loss.data[0] except IndexError: loss.data = torch.reshape(loss.data, (1,)) train_loss += loss.data[0] _, predicted = torch.max(out.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() sys.stdout.write('\r') sys.stdout.write('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' % (epoch, num_epochs, batch_idx+1, (len(trainset)//batch_size)+1, loss.data[0], 100.*correct/total)) sys.stdout.flush()
Example #18
Source File: task.py From cloudml-samples with Apache License 2.0 | 5 votes |
def train_model(args): """Load the data, train the model, test the model, export / save the model """ torch.manual_seed(args.seed) # Download the dataset data_utils.download_data() # Open our dataset train_loader, test_loader = data_utils.load_data(args.test_split, args.batch_size) # Create the model net = model.SonarDNN().double() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, nesterov=False) # Train / Test the model for epoch in range(1, args.epochs + 1): train(net, train_loader, optimizer, epoch) test(net, test_loader) # Export the trained model torch.save(net.state_dict(), args.model_name) if args.model_dir: # Save the model to GCS data_utils.save_model(args.model_dir, args.model_name)
Example #19
Source File: model.py From graph_distillation with Apache License 2.0 | 5 votes |
def __init__(self, modalities, n_classes, n_frames, n_channels, input_sizes, hidden_size, n_layers, dropout, lr, lr_decay_rate, ckpt_path, w_losses, w_modalities, metric, xfer_to, gd_size, gd_reg): super(GraphDistillation, self).__init__( \ modalities, n_classes, n_frames, n_channels, input_sizes, hidden_size, n_layers, dropout, lr, lr_decay_rate, ckpt_path) # Index of the modality to distill to_idx = self.modalities.index(xfer_to) from_idx = [x for x in range(len(self.modalities)) if x != to_idx] assert len(from_idx) >= 1 # Prior w_modalities = [w_modalities[i] for i in from_idx ] # remove modality being transferred to gd_prior = utils.softmax(w_modalities, 0.25) # Distillation model self.distillation_kernel = get_distillation_kernel( n_classes, hidden_size, gd_size, to_idx, from_idx, gd_prior, gd_reg, w_losses, metric).cuda() params = list(self.embeds[to_idx].parameters()) + \ list(self.distillation_kernel.parameters()) self.optimizer = optim.SGD(params, lr=lr, momentum=0.9, weight_decay=5e-4) self.xfer_to = xfer_to self.to_idx = to_idx self.from_idx = from_idx
Example #20
Source File: model.py From graph_distillation with Apache License 2.0 | 5 votes |
def __init__(self, *args, **kwargs): super(SingleStream, self).__init__(*args, **kwargs) assert len(self.embeds) == 1 self.optimizer = optim.SGD( self.embeds[0].parameters(), lr=self.lr, momentum=0.9, weight_decay=5e-4) self.to_idx = 0
Example #21
Source File: model.py From OpenQA with MIT License | 5 votes |
def init_optimizer(self, state_dict=None): """Initialize an optimizer for the free parameters of the network. Args: state_dict: network parameters """ logger.info("init_optimizer") if self.args.fix_embeddings: for p in self.network.embedding.parameters(): p.requires_grad = False for p in self.selector.embedding.parameters(): p.requires_grad = False parameters = [p for p in self.network.parameters() if p.requires_grad] parameters = parameters + [p for p in self.selector.parameters() if p.requires_grad] if self.args.optimizer == 'sgd': self.optimizer = optim.SGD(parameters, self.args.learning_rate, momentum=self.args.momentum, weight_decay=self.args.weight_decay) elif self.args.optimizer == 'adamax': self.optimizer = optim.Adamax(parameters, weight_decay=self.args.weight_decay) else: raise RuntimeError('Unsupported optimizer: %s' % self.args.optimizer) # -------------------------------------------------------------------------- # Learning # --------------------------------------------------------------------------
Example #22
Source File: model.py From graph_distillation with Apache License 2.0 | 5 votes |
def __init__(self, modalities, n_classes, n_frames, n_channels, input_sizes, hidden_size, n_layers, dropout, hidden_size_seq, n_layers_seq, dropout_seq, bg_w, lr, lr_decay_rate, to_idx, ckpt_path, w_losses, w_modalities, metric, xfer_to, gd_size, gd_reg): super(GraphDistillation, self).__init__(\ modalities, n_classes, n_frames, n_channels, input_sizes, hidden_size, n_layers, dropout, hidden_size_seq, n_layers_seq, dropout_seq, bg_w, lr, lr_decay_rate, to_idx, ckpt_path) # Index of the modality to distill to_idx = self.modalities.index(xfer_to) from_idx = [x for x in range(len(self.modalities)) if x != to_idx] assert len(from_idx) >= 1 # Prior w_modalities = [w_modalities[i] for i in from_idx ] # remove modality being transferred to gd_prior = utils.softmax(w_modalities, 0.25) # Distillation model self.distillation_kernel = \ get_distillation_kernel(n_classes + 1, hidden_size, gd_size, to_idx, from_idx, gd_prior, gd_reg, w_losses, metric).cuda() # Add optimizer to self.optimizers gd_optimizer = optim.SGD( self.distillation_kernel.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4) self.optimizers.append(gd_optimizer) self.lr_decay_rates.append(lr_decay_rate) self.xfer_to = xfer_to self.to_idx = to_idx self.from_idx = from_idx
Example #23
Source File: wrapper.py From metalearn-leap with Apache License 2.0 | 5 votes |
def __init__(self, model, optimizer_cls, meta_optimizer_cls, optimizer_kwargs, meta_optimizer_kwargs, meta_kwargs, criterion): super(LeapWrapper, self).__init__(criterion, model, optimizer_cls, optimizer_kwargs) self.meta = Leap(model, **meta_kwargs) self.meta_optimizer_cls = optim.SGD if meta_optimizer_cls.lower() == 'sgd' else optim.Adam self.meta_optimizer = self.meta_optimizer_cls(self.meta.parameters(), **meta_optimizer_kwargs)
Example #24
Source File: task.py From cloudml-samples with Apache License 2.0 | 5 votes |
def get_args(): """Argument parser. Returns: Dictionary of arguments. """ parser = argparse.ArgumentParser(description='PyTorch Sonar Example') parser.add_argument('--model-dir', type=str, help='Where to save the model') parser.add_argument('--model-name', type=str, default='sonar_model', help='What to name the saved model file') parser.add_argument('--batch-size', type=int, default=4, help='input batch size for training (default: 4)') parser.add_argument('--test-split', type=float, default=0.2, help='split size for training / testing dataset') parser.add_argument('--epochs', type=int, default=10, help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.01, help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, help='SGD momentum (default: 0.5)') parser.add_argument('--seed', type=int, default=42, help='random seed (default: 42)') args = parser.parse_args() return args
Example #25
Source File: train.py From EMANet with GNU General Public License v3.0 | 5 votes |
def __init__(self, dt_split): torch.manual_seed(66) torch.cuda.manual_seed_all(66) torch.cuda.set_device(settings.DEVICE) self.log_dir = settings.LOG_DIR self.model_dir = settings.MODEL_DIR ensure_dir(self.log_dir) ensure_dir(self.model_dir) logger.info('set log dir as %s' % self.log_dir) logger.info('set model dir as %s' % self.model_dir) self.step = 1 self.writer = SummaryWriter(osp.join(self.log_dir, 'train.events')) dataset = TrainDataset(split=dt_split) self.dataloader = DataLoader( dataset, batch_size=settings.BATCH_SIZE, pin_memory=True, num_workers=settings.NUM_WORKERS, shuffle=True, drop_last=True) self.net = EMANet(settings.N_CLASSES, settings.N_LAYERS).cuda() self.opt = SGD( params=[ { 'params': get_params(self.net, key='1x'), 'lr': 1 * settings.LR, 'weight_decay': settings.WEIGHT_DECAY, }, { 'params': get_params(self.net, key='1y'), 'lr': 1 * settings.LR, 'weight_decay': 0, }, { 'params': get_params(self.net, key='2x'), 'lr': 2 * settings.LR, 'weight_decay': 0.0, }], momentum=settings.LR_MOM) self.net = DataParallel(self.net, device_ids=settings.DEVICES) patch_replication_callback(self.net)
Example #26
Source File: optimizers_lib.py From bgd with MIT License | 5 votes |
def sgd(model, **kwargs): logger = kwargs.get("logger", None) assert(logger is not None) sgd_params = { "momentum": kwargs.get("momentum", 0.9), "lr": kwargs.get("lr", 0.1), "weight_decay": kwargs.get("weight_decay", 5e-4) } logger.info("SGD params: " + str(sgd_params)) all_params = [{'params': params, 'name': name, 'initial_lr': kwargs.get("lr", 0.1)} for l, (name, params) in enumerate(model.named_parameters())] return optim.SGD(all_params, **sgd_params)
Example #27
Source File: Optim.py From video-caption-openNMT.pytorch with MIT License | 5 votes |
def set_parameters(self, params): self.params = [] self.sparse_params = [] for k, p in params: if p.requires_grad: if self.method != 'sparseadam' or "embed" not in k: self.params.append(p) else: self.sparse_params.append(p) if self.method == 'sgd': self.optimizer = optim.SGD(self.params, lr=self.lr) elif self.method == 'adagrad': self.optimizer = optim.Adagrad(self.params, lr=self.lr) for group in self.optimizer.param_groups: for p in group['params']: self.optimizer.state[p]['sum'] = self.optimizer\ .state[p]['sum'].fill_(self.adagrad_accum) elif self.method == 'adadelta': self.optimizer = optim.Adadelta(self.params, lr=self.lr) elif self.method == 'adam': self.optimizer = optim.Adam(self.params, lr=self.lr, betas=self.betas, eps=1e-9) elif self.method == 'sparseadam': self.optimizer = MultipleOptimizer( [optim.Adam(self.params, lr=self.lr, betas=self.betas, eps=1e-8), optim.SparseAdam(self.sparse_params, lr=self.lr, betas=self.betas, eps=1e-8)]) else: raise RuntimeError("Invalid optim method: " + self.method)
Example #28
Source File: train_detection.py From seamseg with BSD 3-Clause "New" or "Revised" License | 5 votes |
def make_optimizer(config, model, epoch_length): body_config = config["body"] opt_config = config["optimizer"] sch_config = config["scheduler"] # Gather parameters from the network norm_parameters = [] other_parameters = [] for m in model.modules(): if any(isinstance(m, layer) for layer in NORM_LAYERS): norm_parameters += [p for p in m.parameters() if p.requires_grad] elif any(isinstance(m, layer) for layer in OTHER_LAYERS): other_parameters += [p for p in m.parameters() if p.requires_grad] assert len(norm_parameters) + len(other_parameters) == len([p for p in model.parameters() if p.requires_grad]), \ "Not all parameters that require grad are accounted for in the optimizer" # Set-up optimizer hyper-parameters parameters = [ { "params": norm_parameters, "lr": opt_config.getfloat("lr") if not body_config.getboolean("bn_frozen") else 0., "weight_decay": opt_config.getfloat("weight_decay") if opt_config.getboolean("weight_decay_norm") else 0. }, { "params": other_parameters, "lr": opt_config.getfloat("lr"), "weight_decay": opt_config.getfloat("weight_decay") } ] optimizer = optim.SGD( parameters, momentum=opt_config.getfloat("momentum"), nesterov=opt_config.getboolean("nesterov")) scheduler = scheduler_from_config(sch_config, optimizer, epoch_length) assert sch_config["update_mode"] in ("batch", "epoch") batch_update = sch_config["update_mode"] == "batch" total_epochs = sch_config.getint("epochs") return optimizer, scheduler, batch_update, total_epochs
Example #29
Source File: train_instance_seg.py From seamseg with BSD 3-Clause "New" or "Revised" License | 5 votes |
def make_optimizer(config, model, epoch_length): body_config = config["body"] opt_config = config["optimizer"] sch_config = config["scheduler"] # Gather parameters from the network norm_parameters = [] other_parameters = [] for m in model.modules(): if any(isinstance(m, layer) for layer in NORM_LAYERS): norm_parameters += [p for p in m.parameters() if p.requires_grad] elif any(isinstance(m, layer) for layer in OTHER_LAYERS): other_parameters += [p for p in m.parameters() if p.requires_grad] assert len(norm_parameters) + len(other_parameters) == len([p for p in model.parameters() if p.requires_grad]), \ "Not all parameters that require grad are accounted for in the optimizer" # Set-up optimizer hyper-parameters parameters = [ { "params": norm_parameters, "lr": opt_config.getfloat("lr") if not body_config.getboolean("bn_frozen") else 0., "weight_decay": opt_config.getfloat("weight_decay") if opt_config.getboolean("weight_decay_norm") else 0. }, { "params": other_parameters, "lr": opt_config.getfloat("lr"), "weight_decay": opt_config.getfloat("weight_decay") } ] optimizer = optim.SGD( parameters, momentum=opt_config.getfloat("momentum"), nesterov=opt_config.getboolean("nesterov")) scheduler = scheduler_from_config(sch_config, optimizer, epoch_length) assert sch_config["update_mode"] in ("batch", "epoch") batch_update = sch_config["update_mode"] == "batch" total_epochs = sch_config.getint("epochs") return optimizer, scheduler, batch_update, total_epochs
Example #30
Source File: train_panoptic.py From seamseg with BSD 3-Clause "New" or "Revised" License | 5 votes |
def make_optimizer(config, model, epoch_length): body_config = config["body"] opt_config = config["optimizer"] sch_config = config["scheduler"] # Gather parameters from the network norm_parameters = [] other_parameters = [] for m in model.modules(): if any(isinstance(m, layer) for layer in NORM_LAYERS): norm_parameters += [p for p in m.parameters() if p.requires_grad] elif any(isinstance(m, layer) for layer in OTHER_LAYERS): other_parameters += [p for p in m.parameters() if p.requires_grad] assert len(norm_parameters) + len(other_parameters) == len([p for p in model.parameters() if p.requires_grad]), \ "Not all parameters that require grad are accounted for in the optimizer" # Set-up optimizer hyper-parameters parameters = [ { "params": norm_parameters, "lr": opt_config.getfloat("lr") if not body_config.getboolean("bn_frozen") else 0., "weight_decay": opt_config.getfloat("weight_decay") if opt_config.getboolean("weight_decay_norm") else 0. }, { "params": other_parameters, "lr": opt_config.getfloat("lr"), "weight_decay": opt_config.getfloat("weight_decay") } ] optimizer = optim.SGD( parameters, momentum=opt_config.getfloat("momentum"), nesterov=opt_config.getboolean("nesterov")) scheduler = scheduler_from_config(sch_config, optimizer, epoch_length) assert sch_config["update_mode"] in ("batch", "epoch") batch_update = sch_config["update_mode"] == "batch" total_epochs = sch_config.getint("epochs") return optimizer, scheduler, batch_update, total_epochs