import warnings import numpy as np from torch.optim.lr_scheduler import ReduceLROnPlateau, _LRScheduler class GradualWarmupScheduler(_LRScheduler): """ Gradually warm-up(increasing) learning rate in optimizer. Proposed in 'Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour'. From: https://github.com/ildoonet/pytorch-gradual-warmup-lr. Args: optimizer (Optimizer): Wrapped optimizer. multiplier: target learning rate = base lr * multiplier total_epoch: target learning rate is reached at total_epoch, gradually after_scheduler: after target_epoch, use this scheduler(eg. ReduceLROnPlateau) """ def __init__(self, optimizer, multiplier, total_epoch, after_scheduler=None, **kwargs): self.multiplier = multiplier if self.multiplier <= 1.: raise ValueError('multiplier should be greater than 1.') self.total_epoch = total_epoch self.after_scheduler = after_scheduler self.finished = False super().__init__(optimizer) def get_lr(self): if self.last_epoch > self.total_epoch: if self.after_scheduler: if not self.finished: self.after_scheduler.base_lrs = [ base_lr * self.multiplier for base_lr in self.base_lrs ] self.finished = True print("End of WarmUp.") return self.after_scheduler.get_lr() return [base_lr * self.multiplier for base_lr in self.base_lrs] return [ base_lr * ((self.multiplier - 1.) * self.last_epoch / self.total_epoch + 1.) for base_lr in self.base_lrs ] def step_ReduceLROnPlateau(self, metrics, epoch=None): if epoch is None: epoch = self.last_epoch + 1 self.last_epoch = epoch if epoch != 0 else 1 # ReduceLROnPlateau is called at the end of epoch, whereas others are called at beginning if self.last_epoch <= self.total_epoch: warmup_lr = [ base_lr * ((self.multiplier - 1.) * self.last_epoch / self.total_epoch + 1.) for base_lr in self.base_lrs ] for param_group, lr in zip(self.optimizer.param_groups, warmup_lr): param_group['lr'] = lr else: if epoch is None: self.after_scheduler.step(metrics, None) else: self.after_scheduler.step(metrics, epoch - self.total_epoch) def step(self, epoch=None, metrics=None): if type(self.after_scheduler) != ReduceLROnPlateau: if self.finished and self.after_scheduler: if epoch is None: self.after_scheduler.step(None) else: self.after_scheduler.step(epoch - self.total_epoch) else: return super(GradualWarmupScheduler, self).step(epoch) else: self.step_ReduceLROnPlateau(metrics, epoch) class CosineWithRestarts(_LRScheduler): # pylint: disable=protected-access """ Cosine annealing with restarts. This is decribed in the paper https://arxiv.org/abs/1608.03983. Taken from: https://github.com/allenai/allennlp Parameters ---------- optimizer : ``torch.optim.Optimizer`` t_max : ``int`` The maximum number of iterations within the first cycle. eta_min : ``float``, optional (default=0) The minimum learning rate. last_epoch : ``int``, optional (default=-1) The index of the last epoch. This is used when restarting. factor : ``float``, optional (default=1) The factor by which the cycle length (``T_max``) increases after each restart. """ def __init__( self, optimizer, t_max: int, eta_min: float = 0., last_epoch: int = -1, factor: float = 1. ) -> None: assert t_max > 0 assert eta_min >= 0 if t_max == 1 and factor == 1: warnings.warning( "Cosine annealing scheduler will have no effect on the learning " "rate since T_max = 1 and factor = 1." ) self.t_max = t_max self.eta_min = eta_min self.factor = factor self._last_restart: int = 0 self._cycle_counter: int = 0 self._cycle_factor: float = 1. self._updated_cycle_len: int = t_max self._initialized: bool = False super(CosineWithRestarts, self).__init__(optimizer, last_epoch) def get_lr(self): """Get updated learning rate.""" # HACK: We need to check if this is the first time ``self.get_lr()`` was called, # since ``torch.optim.lr_scheduler._LRScheduler`` will call ``self.get_lr()`` # when first initialized, but the learning rate should remain unchanged # for the first epoch. if not self._initialized: self._initialized = True return self.base_lrs step = self.last_epoch + 1 self._cycle_counter = step - self._last_restart lrs = [ self.eta_min + ((lr - self.eta_min) / 2) * ( np.cos( np.pi * (self._cycle_counter % self._updated_cycle_len) / self._updated_cycle_len ) + 1 ) for lr in self.base_lrs ] if self._cycle_counter % self._updated_cycle_len == 0: # Adjust the cycle length. self._cycle_factor *= self.factor self._cycle_counter = 0 self._updated_cycle_len = int(self._cycle_factor * self.t_max) self._last_restart = step return lrs