Python torch.optim.step() Examples

The following are 11 code examples of torch.optim.step(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.optim , or try the search function

Example #1

Source File: amc.py From LeGR with Apache License 2.0

6 votes

def train(self, model, epochs, name):
        self.model = model
        optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9, weight_decay=5e-4, nesterov=True)
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [int(epochs*0.3), int(epochs*0.6), int(epochs*0.8)], gamma=0.2)
        criterion = torch.nn.CrossEntropyLoss()

        for e in range(epochs):
            print('Epoch {}...'.format(e))
            print('Train')
            self.train_epoch(optimizer, criterion)

            top1, _ = self.test(self.test_loader)
            print('Test | Top-1: {:.2f}'.format(top1))
            scheduler.step()
        top1, _ = self.test(self.test_loader)
        torch.save(model, './ckpt/{}_final.t7'.format(name))
        return top1

Example #2

Source File: lstm.py From Gesture-Recognition with MIT License

5 votes

def train(model, optim, criterion, datum, label, states, num_classes):
	''' Modify weights based off cost from one datapoint '''
	optim.zero_grad()
	output, states = model(datum, states)
	output = output.view(1, num_classes)
	is_correct = accuracy(output, label, num_classes)
	loss = criterion(output, label)
	loss.backward()
	states = (states[0].detach(), states[1].detach())
	optim.step()

	return loss.item(), states, is_correct

Example #3

Source File: popnn_torch.py From Gesture-Recognition with MIT License

5 votes

def train(model, optim, criterion, datum, label):
	''' Modify weights based off cost from one datapoint '''
	optim.zero_grad()
	output = model(datum)
	output = output.view(1, num_classes)
	is_correct = accuracy(output, label)
	loss = criterion(output, label)
	loss.backward()
	optim.step()

	return loss.item(), is_correct

Example #4

Source File: orthogonal_layer.py From FrEIA with MIT License

5 votes

def test_param_update(self):

        for i in range(2500):
            optim.zero_grad()

            x = torch.randn(self.batch_size, inp_size)
            y = test_net(x)

            loss = torch.mean((y-x)**2)
            loss.backward()

            for name, p in test_net.named_parameters():
                if 'weights' in name:
                    gp = torch.mm(p.grad, p.data.t())
                    p.grad = torch.mm(gp - gp.t(), p.data)

                    weights = p.data

            optim.step()

            if i%25 == 0:
                print(loss.item(), end='\t')
                WWt = torch.mm(weights, weights.t())
                WWt -= torch.eye(weights.shape[0])
                print(torch.max(torch.abs(WWt)).item(), end='\t')
                print(torch.mean(WWt**2).item(), end='\t')
                print()

Example #5

Source File: morphnet.py From LeGR with Apache License 2.0

5 votes

def train_epoch(model, optim, criterion, loader, lbda=None, cbns=None, maps=None, constraint=None):
    model.train()
    total = 0
    top1 = 0
    for i, (batch, label) in enumerate(loader):
        optim.zero_grad()
        batch, label = batch.to('cuda'), label.to('cuda')
        total += batch.size(0)

        out = model(batch)
        _, pred = out.max(dim=1)
        top1 += pred.eq(label).sum()
        if constraint:
            reg = lbda * regularizer(model, constraint, cbns, maps)
            loss = criterion(out, label) + reg
        else:
            loss = criterion(out, label)
        loss.backward()
        optim.step()

        if (i % 100 == 0) or (i == len(loader)-1):
            print('Train | Batch ({}/{}) | Top-1: {:.2f} ({}/{})'.format(
                i+1, len(loader),
                float(top1)/total*100, top1, total))
    if constraint:
        truncate_smallbeta(model, cbns)

Example #6

Source File: morphnet.py From LeGR with Apache License 2.0

5 votes

def train(model, train_loader, val_loader, epochs=10, lr=1e-2, name=''):
    model = model.to('cuda')
    model.train()

    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4, nesterov=True)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [int(epochs*0.3), int(epochs*0.6), int(epochs*0.8)], gamma=0.2)
    criterion = torch.nn.CrossEntropyLoss()
    
    for e in range(epochs):
        train_epoch(model, optimizer, criterion, train_loader)
        top1, val_loss = test(model, val_loader)
        print('Epoch {} | Top-1: {:.2f}'.format(e, top1))
        torch.save(model, 'ckpt/{}_best.t7'.format(name))
        scheduler.step()
    return model

Example #7

Source File: amc.py From LeGR with Apache License 2.0

5 votes

def train_step(self, policy_loss):
        self.net.zero_grad()
        policy_loss.backward()
        self.optimizer.step()

Example #8

Source File: amc.py From LeGR with Apache License 2.0

5 votes

def train_step(self, state, action, target):
        self.net.zero_grad()
        pred = self.net(state, action)
        loss = self.criterion(pred, target)
        loss.backward()
        self.optimizer.step()
        return pred

Example #9

Source File: amc.py From LeGR with Apache License 2.0

5 votes

def train_epoch(self, optim, criterion):
        self.model.train()
        total = 0
        top1 = 0

        data_t = 0
        train_t = 0
        total_loss = 0
        s = time.time()
        for i, (batch, label) in enumerate(self.train_loader):
            data_t += time.time()-s
            s = time.time()
            optim.zero_grad()
            batch, label = batch.to('cuda'), label.to('cuda')
            total += batch.size(0)

            out = self.model(batch)
            loss = criterion(out, label)
            loss.backward()
            total_loss += loss.item()
            optim.step()
            train_t += time.time()-s

            if (i % 100 == 0) or (i == len(self.train_loader)-1):
                print('Batch ({}/{}) | Loss: {:.3f} | (PerBatch) Data: {:.3f}s,  Network: {:.3f}s'.format(
                    i+1, len(self.train_loader), total_loss/(i+1), data_t/(i+1), train_t/(i+1)))
            s = time.time()

Example #10

Source File: main.py From Text-Classification-Pytorch with MIT License

5 votes

def train_model(model, train_iter, epoch):
    total_epoch_loss = 0
    total_epoch_acc = 0
    model.cuda()
    optim = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()))
    steps = 0
    model.train()
    for idx, batch in enumerate(train_iter):
        text = batch.text[0]
        target = batch.label
        target = torch.autograd.Variable(target).long()
        if torch.cuda.is_available():
            text = text.cuda()
            target = target.cuda()
        if (text.size()[0] is not 32):# One of the batch returned by BucketIterator has length different than 32.
            continue
        optim.zero_grad()
        prediction = model(text)
        loss = loss_fn(prediction, target)
        num_corrects = (torch.max(prediction, 1)[1].view(target.size()).data == target.data).float().sum()
        acc = 100.0 * num_corrects/len(batch)
        loss.backward()
        clip_gradient(model, 1e-1)
        optim.step()
        steps += 1
        
        if steps % 100 == 0:
            print (f'Epoch: {epoch+1}, Idx: {idx+1}, Training Loss: {loss.item():.4f}, Training Accuracy: {acc.item(): .2f}%')
        
        total_epoch_loss += loss.item()
        total_epoch_acc += acc.item()
        
    return total_epoch_loss/len(train_iter), total_epoch_acc/len(train_iter)

Example #11

Source File: amc.py From LeGR with Apache License 2.0

4 votes

def step(self, action):
        self.last_act = action

        self.layer_counter, self.cost, self.rest, rest_max_flops = self.filter_pruner.amc_compress(self.layer_counter, action, self.max_sparsity)

        self.reduced = self.full_flops - self.cost

        m_flop, m_size = 0, 0

        if self.layer_counter >= len(self.filter_pruner.activation_to_conv):
            # Just finish, evaluate reward
            state = torch.zeros(1)

            filters_to_prune_per_layer = self.filter_pruner.get_pruning_plan_from_layer_budget(self.filter_pruner.conv_out_channels)
            prune_targets = self.filter_pruner.pack_pruning_target(filters_to_prune_per_layer, get_segment=True, progressive=True)
            layers_pruned = {}
            for layer_index, filter_index in prune_targets:
                if layer_index not in layers_pruned:
                    layers_pruned[layer_index] = 0
                layers_pruned[layer_index] = layers_pruned[layer_index] + (filter_index[1]-filter_index[0]+1)

            filters_left = {}
            for k in sorted(self.filter_pruner.filter_ranks.keys()):
                if k not in layers_pruned:
                    layers_pruned[k] = 0
                filters_left[k] = len(self.filter_pruner.filter_ranks[k]) - layers_pruned[k]
            print('Filters left: {}'.format(sorted(filters_left.items())))
            print('Prunning filters..')
            for layer_index, filter_index in prune_targets:
                self.filter_pruner.prune_conv_layer_segment(layer_index, filter_index)
            m_flop, m_size = measure_model(self.model, self.filter_pruner, self.image_size)
            flops = 0
            print('FLOPs: {:.3f}M | #Params: {:.3f}M'.format(m_flop/1e6, m_size/1e6))
            self.train_steps(self.steps)
            top1, loss = self.test(self.val_loader)
            reward = top1
            terminal = 1
        else:

            flops = self.filter_pruner.get_unit_flops_for_layer(self.layer_counter)

            conv = self.filter_pruner.activation_to_conv[self.layer_counter]
            h = self.filter_pruner.omap_size[self.layer_counter][0]
            w = self.filter_pruner.omap_size[self.layer_counter][1]

            state = torch.Tensor([float(self.layer_counter)/len(self.filter_pruner.activation_to_conv),
                    float(self.filter_pruner.conv_out_channels[self.layer_counter])/self.max_oc,
                    float(self.filter_pruner.conv_in_channels[self.layer_counter])/self.max_ic,
                    float(h)/self.max_fh,
                    float(w)/self.max_fw,
                    float(conv.stride[0])/self.max_stride,
                    float(conv.weight.size(2))/self.max_k,
                    float(flops) /self.full_flops,
                    float(self.reduced)/self.full_flops,
                    float(self.rest)/self.full_flops,
                    self.last_act])

            reward = 0
            terminal = 0

        return state, reward, terminal, [self.full_flops, rest_max_flops, self.reduced, flops, m_flop, m_size]