Python torch.optim.LBFGS Examples
The following are 30
code examples of torch.optim.LBFGS().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.optim
, or try the search function
.
Example #1
Source File: training.py From learning-circuits with Apache License 2.0 | 7 votes |
def polish(self, nmaxsteps=50, patience=5, threshold=1e-10, save_to_self_model=False): if not save_to_self_model: model_bak = self.model self.model = copy.deepcopy(self.model) self.freeze() optimizer = optim.LBFGS(filter(lambda p: p.requires_grad, self.model.parameters())) def closure(): optimizer.zero_grad() loss = self.loss() loss.backward() return loss n_bad_steps = 0 best_loss = float('inf') for i in range(nmaxsteps): loss = optimizer.step(closure) if loss.item() < best_loss - threshold: best_loss = loss.item() n_bad_steps = 0 else: n_bad_steps += 1 if n_bad_steps > patience: break if not save_to_self_model: self.model = model_bak return loss.item()
Example #2
Source File: learning_fft_old.py From learning-circuits with Apache License 2.0 | 7 votes |
def polish_fft_blockperm_transpose(trial): """Load model from checkpoint, then fix the order of the factor matrices (using the largest logits), and re-optimize using L-BFGS to find the nearest local optima. """ trainable = eval(trial.trainable_name)(trial.config) trainable.restore(str(Path(trial.logdir) / trial._checkpoint.value)) model = trainable.model config = trial.config perm = model[1].argmax() polished_model = Block2x2DiagProduct(size=config['size'], complex=True, decreasing_size=False) polished_model.load_state_dict(model[0].state_dict()) optimizer = optim.LBFGS(polished_model.parameters()) def closure(): optimizer.zero_grad() loss = nn.functional.mse_loss(polished_model(trainable.input)[:, perm], trainable.target_matrix) loss.backward() return loss for i in range(N_LBFGS_STEPS): optimizer.step(closure) torch.save(polished_model.state_dict(), str((Path(trial.logdir) / trial._checkpoint.value).parent / 'polished_model.pth')) loss = nn.functional.mse_loss(polished_model(trainable.input)[:, perm], trainable.target_matrix) return loss.item()
Example #3
Source File: neural_style.py From neural-style-pt with MIT License | 6 votes |
def setup_optimizer(img): if params.optimizer == 'lbfgs': print("Running optimization with L-BFGS") optim_state = { 'max_iter': params.num_iterations, 'tolerance_change': -1, 'tolerance_grad': -1, } if params.lbfgs_num_correction != 100: optim_state['history_size'] = params.lbfgs_num_correction optimizer = optim.LBFGS([img], **optim_state) loopVal = 1 elif params.optimizer == 'adam': print("Running optimization with ADAM") optimizer = optim.Adam([img], lr = params.learning_rate) loopVal = params.num_iterations - 1 return optimizer, loopVal
Example #4
Source File: learning_fft_old.py From learning-circuits with Apache License 2.0 | 6 votes |
def polish_fft_blockperm(trial): """Load model from checkpoint, then fix the order of the factor matrices (using the largest logits), and re-optimize using L-BFGS to find the nearest local optima. """ trainable = eval(trial.trainable_name)(trial.config) trainable.restore(str(Path(trial.logdir) / trial._checkpoint.value)) model = trainable.model config = trial.config perm = model[0].argmax() polished_model = Block2x2DiagProduct(size=config['size'], complex=True) polished_model.load_state_dict(model[1].state_dict()) optimizer = optim.LBFGS(polished_model.parameters()) def closure(): optimizer.zero_grad() loss = nn.functional.mse_loss(polished_model(trainable.input[:, perm]), trainable.target_matrix) loss.backward() return loss for i in range(N_LBFGS_STEPS): optimizer.step(closure) torch.save(polished_model.state_dict(), str((Path(trial.logdir) / trial._checkpoint.value).parent / 'polished_model.pth')) loss = nn.functional.mse_loss(polished_model(trainable.input[:, perm]), trainable.target_matrix) return loss.item()
Example #5
Source File: learning_fft_old.py From learning-circuits with Apache License 2.0 | 6 votes |
def polished_loss_fft_learn_perm(trainable): model = trainable.model polished_model = ButterflyProduct(size=model.size, complex=model.complex, fixed_order=True) temperature = 1.0 / (0.3 * trainable._iteration + 1) trainable.perm = torch.argmax(sinkhorn(model.perm_logit / temperature), dim=1) if not model.fixed_order: prob = model.softmax_fn(model.logit) maxes, argmaxes = torch.max(prob, dim=-1) polished_model.factors = nn.ModuleList([model.factors[argmax] for argmax in argmaxes]) else: polished_model.factors = model.factors preopt_loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.perm], trainable.target_matrix) optimizer = optim.LBFGS(polished_model.parameters()) def closure(): optimizer.zero_grad() loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.perm], trainable.target_matrix) loss.backward() return loss for i in range(N_LBFGS_STEPS_VALIDATION): optimizer.step(closure) loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.perm], trainable.target_matrix) # return loss.item() if not torch.isnan(loss) else preopt_loss.item() if not torch.isnan(preopt_loss) else float('inf') return loss.item() if not torch.isnan(loss) else preopt_loss.item() if not torch.isnan(preopt_loss) else 9999.0
Example #6
Source File: calibration.py From incremental_learning.pytorch with MIT License | 6 votes |
def calibrate(network, loader, device, indexes, calibration_type="linear"): """Corrects the bias for new classes. :param network: The logits extractor model, usually convnet+FC w/o final act. :param loader: The validation data loader. :param device: Device on which apply the computation. :param indexes: A list of tuple made a starting and ending indexes. They delimit on which range of targets to apply the calibration. If given several tuples, different models will be used per range. :return: A wrapper `CalibrationWrapper`. """ logits, labels = _extract_data(network, loader, device) calibration_wrapper = _get_calibration_model(indexes, calibration_type).to(device) def eval(): corrected_logits = calibration_wrapper(logits) loss = F.cross_entropy(corrected_logits, labels) loss.backward() return loss optimizer = optim.LBFGS(calibration_wrapper.parameters(), lr=0.01, max_iter=50) optimizer.step(eval) return calibration_wrapper
Example #7
Source File: pytorch_matplotlib.py From trains with Apache License 2.0 | 6 votes |
def get_input_optimizer(input_img): # this line to show that input is a parameter that requires a gradient optimizer = optim.LBFGS([input_img.requires_grad_()]) return optimizer ###################################################################### # Finally, we must define a function that performs the neural transfer. For # each iteration of the networks, it is fed an updated input and computes # new losses. We will run the ``backward`` methods of each loss module to # dynamicaly compute their gradients. The optimizer requires a "closure" # function, which reevaluates the modul and returns the loss. # # We still have one final constraint to address. The network may try to # optimize the input with values that exceed the 0 to 1 tensor range for # the image. We can address this by correcting the input values to be # between 0 to 1 each time the network is run. #
Example #8
Source File: torch_utils.py From pytorch-trpo with MIT License | 6 votes |
def fit(self, observations, labels): def closure(): predicted = self.predict(observations) loss = self.loss_fn(predicted, labels) self.optimizer.zero_grad() loss.backward() return loss old_params = parameters_to_vector(self.model.parameters()) for lr in self.lr * .5**np.arange(10): self.optimizer = optim.LBFGS(self.model.parameters(), lr=lr) self.optimizer.step(closure) current_params = parameters_to_vector(self.model.parameters()) if any(np.isnan(current_params.data.cpu().numpy())): print("LBFGS optimization diverged. Rolling back update...") vector_to_parameters(old_params, self.model.parameters()) else: return
Example #9
Source File: model_optimizers.py From pytorch-lightning with Apache License 2.0 | 5 votes |
def configure_optimizers__lbfgs(self): """ return whatever optimizers we want here. :return: list of optimizers """ optimizer = optim.LBFGS(self.parameters(), lr=self.learning_rate) return optimizer
Example #10
Source File: learning_circulant.py From learning-circuits with Apache License 2.0 | 5 votes |
def polish_dct_complex(trial): """Load model from checkpoint, then fix the order of the factor matrices (using the largest logits), and re-optimize using L-BFGS to find the nearest local optima. """ trainable = eval(trial.trainable_name)(trial.config) trainable.restore(str(Path(trial.logdir) / trial._checkpoint.value)) model = trainable.model config = trial.config polished_model = ButterflyProduct(size=config['size'], complex=model.complex, fixed_order=True) if not model.fixed_order: prob = model.softmax_fn(model.logit) maxes, argmaxes = torch.max(prob, dim=-1) polished_model.factors = nn.ModuleList([model.factors[argmax] for argmax in argmaxes]) else: polished_model.factors = model.factors optimizer = optim.LBFGS(polished_model.parameters()) def closure(): optimizer.zero_grad() loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.perm, 0], trainable.target_matrix) loss.backward() return loss for i in range(N_LBFGS_STEPS): optimizer.step(closure) torch.save(polished_model.state_dict(), str((Path(trial.logdir) / trial._checkpoint.value).parent / 'polished_model.pth')) loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.perm, 0], trainable.target_matrix) return loss.item()
Example #11
Source File: learning_circulant.py From learning-circuits with Apache License 2.0 | 5 votes |
def polish_dct_real(trial): """Load model from checkpoint, then fix the order of the factor matrices (using the largest logits), and re-optimize using L-BFGS to find the nearest local optima. """ trainable = eval(trial.trainable_name)(trial.config) trainable.restore(str(Path(trial.logdir) / trial._checkpoint.value)) model = trainable.model config = trial.config polished_model = ButterflyProduct(size=config['size'], complex=model.complex, fixed_order=True) if not model.fixed_order: prob = model.softmax_fn(model.logit) maxes, argmaxes = torch.max(prob, dim=-1) polished_model.factors = nn.ModuleList([model.factors[argmax] for argmax in argmaxes]) else: polished_model.factors = model.factors optimizer = optim.LBFGS(polished_model.parameters()) def closure(): optimizer.zero_grad() loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.perm], trainable.target_matrix) loss.backward() return loss for i in range(N_LBFGS_STEPS): optimizer.step(closure) torch.save(polished_model.state_dict(), str((Path(trial.logdir) / trial._checkpoint.value).parent / 'polished_model.pth')) loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.perm], trainable.target_matrix) return loss.item()
Example #12
Source File: learning_fft.py From learning-circuits with Apache License 2.0 | 5 votes |
def polish_fft_learn_perm(trial): """Load model from checkpoint, then fix the order of the factor matrices (using the largest logits), and re-optimize using L-BFGS to find the nearest local optima. """ trainable = eval(trial.trainable_name)(trial.config) trainable.restore(str(Path(trial.logdir) / trial._checkpoint.value)) model = trainable.model config = trial.config polished_model = ButterflyProduct(size=config['size'], complex=model.complex, fixed_order=True) temperature = 1.0 / (0.3 * trainable._iteration + 1) trainable.perm = torch.argmax(sinkhorn(model.perm_logit / temperature), dim=1) if not model.fixed_order: prob = model.softmax_fn(model.logit) maxes, argmaxes = torch.max(prob, dim=-1) polished_model.factors = nn.ModuleList([model.factors[argmax] for argmax in argmaxes]) else: polished_model.factors = model.factors optimizer = optim.LBFGS(polished_model.parameters()) def closure(): optimizer.zero_grad() loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.perm], trainable.target_matrix) loss.backward() return loss for i in range(N_LBFGS_STEPS): optimizer.step(closure) torch.save(polished_model.state_dict(), str((Path(trial.logdir) / trial._checkpoint.value).parent / 'polished_model.pth')) loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.perm], trainable.target_matrix) return loss.item()
Example #13
Source File: learning_fft.py From learning-circuits with Apache License 2.0 | 5 votes |
def polish_fft(trial): """Load model from checkpoint, then fix the order of the factor matrices (using the largest logits), and re-optimize using L-BFGS to find the nearest local optima. """ trainable = eval(trial.trainable_name)(trial.config) trainable.restore(str(Path(trial.logdir) / trial._checkpoint.value)) model = trainable.model config = trial.config polished_model = ButterflyProduct(size=config['size'], complex=model.complex, fixed_order=True) if not model.fixed_order: prob = model.softmax_fn(model.logit) maxes, argmaxes = torch.max(prob, dim=-1) polished_model.factors = nn.ModuleList([model.factors[argmax] for argmax in argmaxes]) else: polished_model.factors = model.factors optimizer = optim.LBFGS(polished_model.parameters()) def closure(): optimizer.zero_grad() loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.br_perm], trainable.target_matrix) loss.backward() return loss for i in range(N_LBFGS_STEPS): optimizer.step(closure) torch.save(polished_model.state_dict(), str((Path(trial.logdir) / trial._checkpoint.value).parent / 'polished_model.pth')) loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.br_perm], trainable.target_matrix) return loss.item()
Example #14
Source File: train.py From SGC with MIT License | 5 votes |
def train_linear(model, feat_dict, weight_decay, binary=False): if not binary: act = partial(F.log_softmax, dim=1) criterion = F.nll_loss else: act = torch.sigmoid criterion = F.binary_cross_entropy optimizer = optim.LBFGS(model.parameters()) best_val_loss = float('inf') best_val_acc = 0 plateau = 0 start = time.perf_counter() for epoch in range(args.epochs): def closure(): optimizer.zero_grad() output = model(feat_dict["train"].cuda()).squeeze() l2_reg = 0.5*weight_decay*(model.W.weight**2).sum() loss = criterion(act(output), label_dict["train"].cuda())+l2_reg loss.backward() return loss optimizer.step(closure) train_time = time.perf_counter()-start val_res = eval_linear(model, feat_dict["val"].cuda(), label_dict["val"].cuda(), binary) return val_res['accuracy'], model, train_time
Example #15
Source File: reddit.py From SGC with MIT License | 5 votes |
def train_regression(model, train_features, train_labels, epochs): optimizer = optim.LBFGS(model.parameters(), lr=1) model.train() def closure(): optimizer.zero_grad() output = model(train_features) loss_train = F.cross_entropy(output, train_labels) loss_train.backward() return loss_train t = perf_counter() for epoch in range(epochs): loss_train = optimizer.step(closure) train_time = perf_counter()-t return model, train_time
Example #16
Source File: polish.py From learning-circuits with Apache License 2.0 | 5 votes |
def polish_fft(trial): trainable = eval(trial.trainable_name)(trial.config) trainable.restore(str(Path(trial.logdir) / trial._checkpoint.value)) model = trainable.model config = trial.config polished_model = ButterflyProduct(size=config['size'], complex=model.complex, fixed_order=True) if not model.fixed_order: prob = model.softmax_fn(model.logit) maxes, argmaxes = torch.max(prob, dim=-1) # print(maxes) # if torch.all(maxes >= 0.99): polished_model.butterflies = nn.ModuleList([model.butterflies[argmax] for argmax in argmaxes]) # else: # return -trial.last_result['negative_loss'] else: polished_model.butterflies = model.butterflies optimizer = optim.LBFGS(polished_model.parameters()) def closure(): optimizer.zero_grad() loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.br_perm], trainable.target_matrix) loss.backward() return loss for i in range(N_LBFGS_STEPS): optimizer.step(closure) torch.save(polished_model.state_dict(), str((Path(trial.logdir) / trial._checkpoint.value).parent / 'polished_model.pth')) loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.br_perm], trainable.target_matrix) return loss.item()
Example #17
Source File: polish.py From learning-circuits with Apache License 2.0 | 5 votes |
def polish_hadamard(trial): trainable = eval(trial.trainable_name)(trial.config) trainable.restore(str(Path(trial.logdir) / trial._checkpoint.value)) model = trainable.model config = trial.config polished_model = ButterflyProduct(size=config['size'], complex=model.complex, fixed_order=True) if not model.fixed_order: prob = model.softmax_fn(model.logit) maxes, argmaxes = torch.max(prob, dim=-1) # print(maxes) # if torch.all(maxes >= 0.99): polished_model.butterflies = nn.ModuleList([model.butterflies[argmax] for argmax in argmaxes]) # else: # return -trial.last_result['negative_loss'] else: polished_model.butterflies = model.butterflies optimizer = optim.LBFGS(polished_model.parameters()) def closure(): optimizer.zero_grad() loss = nn.functional.mse_loss(polished_model.matrix(), trainable.target_matrix) loss.backward() return loss for i in range(N_LBFGS_STEPS): optimizer.step(closure) torch.save(polished_model.state_dict(), str((Path(trial.logdir) / trial._checkpoint.value).parent / 'polished_model.pth')) loss = nn.functional.mse_loss(polished_model.matrix(), trainable.target_matrix) return loss.item()
Example #18
Source File: optimizer.py From XenonPy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, *, lr=1, max_iter=20, max_eval=None, tolerance_grad=1e-5, tolerance_change=1e-9, history_size=100, line_search_fn=None): """Implements L-BFGS algorithm. .. warning:: This optimizer doesn't support per-parameter options and parameter groups (there can be only one). .. warning:: Right now all parameters have to be on a single device. This will be improved in the future. .. note:: This is a very memory intensive optimizer (it requires additional ``param_bytes * (history_size + 1)`` bytes). If it doesn't fit in memory try reducing the history size, or use a different algorithm. Arguments: lr (float): learning rate (default: 1) max_iter (int): maximal number of iterations per optimization step (default: 20) max_eval (int): maximal number of function evaluations per optimization step (default: max_iter * 1.25). tolerance_grad (float): termination tolerance on first order optimality (default: 1e-5). tolerance_change (float): termination tolerance on function value/parameter changes (default: 1e-9). history_size (int): update history size (default: 100). """ super().__init__(optim.LBFGS, lr=lr, max_iter=max_iter, max_eval=max_eval, tolerance_grad=tolerance_grad, tolerance_change=tolerance_change, history_size=history_size, line_search_fn=line_search_fn)
Example #19
Source File: learning_legendre.py From learning-circuits with Apache License 2.0 | 5 votes |
def polish_dct_real(trial): """Load model from checkpoint, then fix the order of the factor matrices (using the largest logits), and re-optimize using L-BFGS to find the nearest local optima. """ trainable = eval(trial.trainable_name)(trial.config) trainable.restore(str(Path(trial.logdir) / trial._checkpoint.value)) model = trainable.model config = trial.config polished_model = ButterflyProduct(size=config['size'], complex=model.complex, fixed_order=True) if not model.fixed_order: prob = model.softmax_fn(model.logit) maxes, argmaxes = torch.max(prob, dim=-1) polished_model.factors = nn.ModuleList([model.factors[argmax] for argmax in argmaxes]) else: polished_model.factors = model.factors optimizer = optim.LBFGS(polished_model.parameters()) def closure(): optimizer.zero_grad() loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.perm], trainable.target_matrix) loss.backward() return loss for i in range(N_LBFGS_STEPS): optimizer.step(closure) torch.save(polished_model.state_dict(), str((Path(trial.logdir) / trial._checkpoint.value).parent / 'polished_model.pth')) loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.perm], trainable.target_matrix) return loss.item()
Example #20
Source File: learning_vandermonde.py From learning-circuits with Apache License 2.0 | 5 votes |
def polish_dct_complex(trial): """Load model from checkpoint, then fix the order of the factor matrices (using the largest logits), and re-optimize using L-BFGS to find the nearest local optima. """ trainable = eval(trial.trainable_name)(trial.config) trainable.restore(str(Path(trial.logdir) / trial._checkpoint.value)) model = trainable.model config = trial.config polished_model = ButterflyProduct(size=config['size'], complex=model.complex, fixed_order=True) if not model.fixed_order: prob = model.softmax_fn(model.logit) maxes, argmaxes = torch.max(prob, dim=-1) polished_model.factors = nn.ModuleList([model.factors[argmax] for argmax in argmaxes]) else: polished_model.factors = model.factors optimizer = optim.LBFGS(polished_model.parameters()) def closure(): optimizer.zero_grad() loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.perm, 0], trainable.target_matrix) loss.backward() return loss for i in range(N_LBFGS_STEPS): optimizer.step(closure) torch.save(polished_model.state_dict(), str((Path(trial.logdir) / trial._checkpoint.value).parent / 'polished_model.pth')) loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.perm, 0], trainable.target_matrix) return loss.item()
Example #21
Source File: learning_vandermonde.py From learning-circuits with Apache License 2.0 | 5 votes |
def polish_dct_real(trial): """Load model from checkpoint, then fix the order of the factor matrices (using the largest logits), and re-optimize using L-BFGS to find the nearest local optima. """ trainable = eval(trial.trainable_name)(trial.config) trainable.restore(str(Path(trial.logdir) / trial._checkpoint.value)) model = trainable.model config = trial.config polished_model = ButterflyProduct(size=config['size'], complex=model.complex, fixed_order=True) if not model.fixed_order: prob = model.softmax_fn(model.logit) maxes, argmaxes = torch.max(prob, dim=-1) polished_model.factors = nn.ModuleList([model.factors[argmax] for argmax in argmaxes]) else: polished_model.factors = model.factors optimizer = optim.LBFGS(polished_model.parameters()) def closure(): optimizer.zero_grad() loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.perm], trainable.target_matrix) loss.backward() return loss for i in range(N_LBFGS_STEPS): optimizer.step(closure) torch.save(polished_model.state_dict(), str((Path(trial.logdir) / trial._checkpoint.value).parent / 'polished_model.pth')) loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.perm], trainable.target_matrix) return loss.item()
Example #22
Source File: learning_fft_old.py From learning-circuits with Apache License 2.0 | 5 votes |
def polish_fft_learn_perm(trial): """Load model from checkpoint, then fix the order of the factor matrices (using the largest logits), and re-optimize using L-BFGS to find the nearest local optima. """ trainable = eval(trial.trainable_name)(trial.config) trainable.restore(str(Path(trial.logdir) / trial._checkpoint.value)) model = trainable.model config = trial.config polished_model = ButterflyProduct(size=config['size'], complex=model.complex, fixed_order=True) temperature = 1.0 / (0.3 * trainable._iteration + 1) trainable.perm = torch.argmax(sinkhorn(model.perm_logit / temperature), dim=1) if not model.fixed_order: prob = model.softmax_fn(model.logit) maxes, argmaxes = torch.max(prob, dim=-1) polished_model.factors = nn.ModuleList([model.factors[argmax] for argmax in argmaxes]) else: polished_model.factors = model.factors optimizer = optim.LBFGS(polished_model.parameters()) def closure(): optimizer.zero_grad() loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.perm], trainable.target_matrix) loss.backward() return loss for i in range(N_LBFGS_STEPS): optimizer.step(closure) torch.save(polished_model.state_dict(), str((Path(trial.logdir) / trial._checkpoint.value).parent / 'polished_model.pth')) loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.perm], trainable.target_matrix) return loss.item()
Example #23
Source File: learning_fft_old.py From learning-circuits with Apache License 2.0 | 5 votes |
def polish_fft(trial): """Load model from checkpoint, then fix the order of the factor matrices (using the largest logits), and re-optimize using L-BFGS to find the nearest local optima. """ trainable = eval(trial.trainable_name)(trial.config) trainable.restore(str(Path(trial.logdir) / trial._checkpoint.value)) model = trainable.model config = trial.config polished_model = ButterflyProduct(size=config['size'], complex=model.complex, fixed_order=True) if not model.fixed_order: prob = model.softmax_fn(model.logit) maxes, argmaxes = torch.max(prob, dim=-1) polished_model.factors = nn.ModuleList([model.factors[argmax] for argmax in argmaxes]) else: polished_model.factors = model.factors optimizer = optim.LBFGS(polished_model.parameters()) def closure(): optimizer.zero_grad() loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.br_perm], trainable.target_matrix) loss.backward() return loss for i in range(N_LBFGS_STEPS): optimizer.step(closure) torch.save(polished_model.state_dict(), str((Path(trial.logdir) / trial._checkpoint.value).parent / 'polished_model.pth')) loss = nn.functional.mse_loss(polished_model.matrix()[:, trainable.br_perm], trainable.target_matrix) return loss.item()
Example #24
Source File: learning_ops.py From learning-circuits with Apache License 2.0 | 5 votes |
def polish_ops(trial): """Load model from checkpoint, and re-optimize using L-BFGS to find the nearest local optimum. """ trainable = eval(trial.trainable_name)(trial.config) trainable.restore(str(Path(trial.logdir) / trial._checkpoint.value)) model = trainable.model config = trial.config polished_model = HstackDiagProduct(size=config['size']) polished_model.factors = model.factors polished_model.P_init = model.P_init optimizer = optim.LBFGS(polished_model.parameters()) def closure(): optimizer.zero_grad() eye = torch.eye(polished_model.size) x = (eye[:, :, None, None] * torch.eye(2)).unsqueeze(-1) y = polished_model(x[:, trainable.br_perm]) loss = nn.functional.mse_loss(y, trainable.target_matrix) loss.backward() return loss for i in range(N_LBFGS_STEPS): optimizer.step(closure) torch.save(polished_model.state_dict(), str((Path(trial.logdir) / trial._checkpoint.value).parent / 'polished_model.pth')) eye = torch.eye(polished_model.size) x = (eye[:, :, None, None] * torch.eye(2)).unsqueeze(-1) y = polished_model(x[:, trainable.br_perm]) loss = nn.functional.mse_loss(y, trainable.target_matrix) return loss.item()
Example #25
Source File: learning_hadamard.py From learning-circuits with Apache License 2.0 | 5 votes |
def polish_hadamard(trial): """Load model from checkpoint, then fix the order of the factor matrices (using the largest logits), and re-optimize using L-BFGS to find the nearest local optima. """ trainable = eval(trial.trainable_name)(trial.config) trainable.restore(str(Path(trial.logdir) / trial._checkpoint.value)) model = trainable.model config = trial.config polished_model = ButterflyProduct(size=config['size'], complex=model.complex, fixed_order=True) if not model.fixed_order: prob = model.softmax_fn(model.logit) maxes, argmaxes = torch.max(prob, dim=-1) polished_model.factors = nn.ModuleList([model.factors[argmax] for argmax in argmaxes]) else: polished_model.factors = model.factors optimizer = optim.LBFGS(polished_model.parameters()) def closure(): optimizer.zero_grad() loss = nn.functional.mse_loss(polished_model.matrix(), trainable.target_matrix) loss.backward() return loss for i in range(N_LBFGS_STEPS): optimizer.step(closure) torch.save(polished_model.state_dict(), str((Path(trial.logdir) / trial._checkpoint.value).parent / 'polished_model.pth')) loss = nn.functional.mse_loss(polished_model.matrix(), trainable.target_matrix) return loss.item()
Example #26
Source File: train.py From Autoencoder with Apache License 2.0 | 5 votes |
def main(): train_loader = DataLoader(dataset=VaeDataset('train'), batch_size=batch_size, shuffle=True, pin_memory=True, drop_last=True) val_loader = DataLoader(dataset=VaeDataset('valid'), batch_size=batch_size, shuffle=False, pin_memory=True, drop_last=True) # Create SegNet model label_nbr = 3 model = SegNet(label_nbr) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [40, xxx] -> [10, ...], [10, ...], [10, ...], [10, ...] on 4 GPUs model = nn.DataParallel(model) # Use appropriate device model = model.to(device) # print(model) # define the optimizer # optimizer = optim.LBFGS(model.parameters(), lr=0.8) optimizer = optim.Adam(model.parameters(), lr=lr) best_loss = 100000 epochs_since_improvement = 0 # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: adjust_learning_rate(optimizer, 0.8) # One epoch's training train(epoch, train_loader, model, optimizer) # One epoch's validation val_loss = valid(val_loader, model) print('\n * LOSS - {loss:.3f}\n'.format(loss=val_loss)) # Check if there was an improvement is_best = val_loss < best_loss best_loss = min(best_loss, val_loss) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,)) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(epoch, model, optimizer, val_loss, is_best)
Example #27
Source File: get_optimizer.py From PyMIC with Apache License 2.0 | 5 votes |
def get_optimiser(name, net_params, optim_params): lr = optim_params['learning_rate'] momentum = optim_params['momentum'] weight_decay = optim_params['weight_decay'] if(name == "SGD"): return optim.SGD(net_params, lr, momentum = momentum, weight_decay = weight_decay) elif(name == "Adam"): return optim.Adam(net_params, lr, weight_decay = 1e-5) elif(name == "SparseAdam"): return optim.SparseAdam(net_params, lr) elif(name == "Adadelta"): return optim.Adadelta(net_params, lr, weight_decay = weight_decay) elif(name == "Adagrad"): return optim.Adagrad(net_params, lr, weight_decay = weight_decay) elif(name == "Adamax"): return optim.Adamax(net_params, lr, weight_decay = weight_decay) elif(name == "ASGD"): return optim.ASGD(net_params, lr, weight_decay = weight_decay) elif(name == "LBFGS"): return optim.LBFGS(net_params, lr) elif(name == "RMSprop"): return optim.RMSprop(net_params, lr, momentum = momentum, weight_decay = weight_decay) elif(name == "Rprop"): return optim.Rprop(net_params, lr) else: raise ValueError("unsupported optimizer {0:}".format(name))
Example #28
Source File: test_training.py From torch-kalman with MIT License | 5 votes |
def _train_kf(self, data: torch.Tensor, num_epochs: int = 8, cls: Type['KalmanFilter'] = KalmanFilter): kf = cls( measures=['y'], processes=[ LocalLevel(id='local_level').add_measure('y'), Season(id='day_in_week', seasonal_period=7, dt_unit='D').add_measure('y') ] ) kf.opt = LBFGS(kf.parameters()) start_datetimes = ( np.zeros(self.config['num_groups'], dtype='timedelta64') + DEFAULT_START_DT ) def closure(): kf.opt.zero_grad() pred = kf(data, start_datetimes=start_datetimes) loss = -pred.log_prob(data).mean() loss.backward() return loss print(f"Will train for {num_epochs} epochs...") loss = float('nan') for i in range(num_epochs): new_loss = kf.opt.step(closure) print(f"EPOCH {i}, LOSS {new_loss.item()}, DELTA {loss - new_loss.item()}") loss = new_loss.item() return kf(data, start_datetimes=start_datetimes).predictions
Example #29
Source File: train.py From Neural-Style-Transfer-pytorch with MIT License | 4 votes |
def main(args): content_img = args.content_img style_img = args.style_img size = args.size steps = args.steps c_weight = args.c_weight s_weight = args.s_weight content_img, style_img = loader(content_img, style_img, size = size) input_img = content_img.clone() # just noise array is fine model, style_losses, content_losses = nst(content_img, style_img) optimizer = optim.LBFGS([input_img.requires_grad_()]) step = [0] while step[0] <= steps: def closure(): input_img.data.clamp_(0, 1) optimizer.zero_grad() output = model(input_img) cl = 0 sl = 0 for c_loss in content_losses: cl += c_loss.loss * c_weight for s_loss in style_losses: sl += s_loss.loss * s_weight loss = cl + sl loss.backward() if step[0] % 50 == 0: print('Step : {}'. format(step)) print('Style Loss : {:3f} Content Loss: {:3f}'.format( sl.item(), cl.item())) step[0] += 1 return loss optimizer.step(closure) input_img.data.clamp_(0,1) return input_img imshow(content_img, title = 'Input image') plt.show()
Example #30
Source File: temperature_scaling.py From temperature_scaling with MIT License | 4 votes |
def set_temperature(self, valid_loader): """ Tune the tempearature of the model (using the validation set). We're going to set it to optimize NLL. valid_loader (DataLoader): validation set loader """ self.cuda() nll_criterion = nn.CrossEntropyLoss().cuda() ece_criterion = _ECELoss().cuda() # First: collect all the logits and labels for the validation set logits_list = [] labels_list = [] with torch.no_grad(): for input, label in valid_loader: input = input.cuda() logits = self.model(input) logits_list.append(logits) labels_list.append(label) logits = torch.cat(logits_list).cuda() labels = torch.cat(labels_list).cuda() # Calculate NLL and ECE before temperature scaling before_temperature_nll = nll_criterion(logits, labels).item() before_temperature_ece = ece_criterion(logits, labels).item() print('Before temperature - NLL: %.3f, ECE: %.3f' % (before_temperature_nll, before_temperature_ece)) # Next: optimize the temperature w.r.t. NLL optimizer = optim.LBFGS([self.temperature], lr=0.01, max_iter=50) def eval(): loss = nll_criterion(self.temperature_scale(logits), labels) loss.backward() return loss optimizer.step(eval) # Calculate NLL and ECE after temperature scaling after_temperature_nll = nll_criterion(self.temperature_scale(logits), labels).item() after_temperature_ece = ece_criterion(self.temperature_scale(logits), labels).item() print('Optimal temperature: %.3f' % self.temperature.item()) print('After temperature - NLL: %.3f, ECE: %.3f' % (after_temperature_nll, after_temperature_ece)) return self