Python mxnet.autograd.record() Examples

The following are 30 code examples of mxnet.autograd.record(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module mxnet.autograd , or try the search function .
Example #1
Source File: train_gl_seg.py    From imgclsmob with MIT License 6 votes vote down vote up
def training(self, epoch):
        tbar = tqdm(self.train_data)
        train_loss = 0.0
        for i, (data, target) in enumerate(tbar):
            with autograd.record(True):
                outputs = self.net(data.astype(args.dtype, copy=False))
                losses = self.criterion(outputs, target)
                mx.nd.waitall()
                autograd.backward(losses)
            self.optimizer.step(self.args.batch_size)
            for loss in losses:
                train_loss += loss.asnumpy()[0] / len(losses)
            tbar.set_description('Epoch {}, training loss {}'.format(epoch, train_loss / (i + 1)))
            mx.nd.waitall()

        # save every epoch
        save_checkpoint(self.net.module, self.args, False) 
Example #2
Source File: train.py    From panoptic-fpn-gluon with Apache License 2.0 6 votes vote down vote up
def training(self, epoch):
        tbar = tqdm(self.train_data)
        train_loss = 0.0
        alpha = 0.2
        for i, (data, target) in enumerate(tbar):
            with autograd.record(True):
                outputs = self.net(data.astype(args.dtype, copy=False))
                losses = self.criterion(outputs, target)
                mx.nd.waitall()
                autograd.backward(losses)
            self.optimizer.step(self.args.batch_size)
            for loss in losses:
                train_loss += loss.asnumpy()[0] / len(losses)
            tbar.set_description('Epoch %d, training loss %.3f'%\
                (epoch, train_loss/(i+1)))
            mx.nd.waitall()

        # save every epoch
        save_checkpoint(self.net.module, self.args, False) 
Example #3
Source File: main.py    From CapsuleNet-Gluon with MIT License 6 votes vote down vote up
def train(net,epochs, ctx, train_data,test_data,
            margin_loss, reconstructions_loss, 
            batch_size,scale_factor):
    num_classes = 10
    trainer = gluon.Trainer(
        net.collect_params(),'sgd', {'learning_rate': 0.05, 'wd': 5e-4})

    for epoch in range(epochs):
        train_loss = 0.0
        for batch_idx, (data, label) in tqdm(enumerate(train_data), total=len(train_data), ncols=70, leave=False, unit='b'):
            label = label.as_in_context(ctx)
            data = data.as_in_context(ctx)
            with autograd.record():
                prob, X_l2norm, reconstructions = net(data, label)
                loss1 = margin_loss(data, num_classes,  label, X_l2norm)
                loss2 = reconstructions_loss(reconstructions, data)
                loss = loss1 + scale_factor * loss2
                loss.backward()
            trainer.step(batch_size)
            train_loss += nd.mean(loss).asscalar()
        test_acc = test(test_data, net, ctx)
        print('Epoch:{}, TrainLoss:{:.5f}, TestAcc:{}'.format(epoch,train_loss / len(train_data),test_acc)) 
Example #4
Source File: train.py    From gluon-cv with Apache License 2.0 6 votes vote down vote up
def training(self, epoch):
        tbar = tqdm(self.train_data)
        train_loss = 0.0
        for i, (data, target) in enumerate(tbar):
            with autograd.record(True):
                outputs = self.net(data.astype(args.dtype, copy=False))
                losses = self.criterion(outputs, target)
                mx.nd.waitall()
                autograd.backward(losses)
            self.optimizer.step(self.args.batch_size)
            for loss in losses:
                train_loss += np.mean(loss.asnumpy()) / len(losses)
            tbar.set_description('Epoch %d, training loss %.3f' % \
                (epoch, train_loss/(i+1)))
            if i != 0 and i % self.args.log_interval == 0:
                self.logger.info('Epoch %d iteration %04d/%04d: training loss %.3f' % \
                    (epoch, i, len(self.train_data), train_loss/(i+1)))
            mx.nd.waitall()

        # save every epoch
        if self.args.no_val:
            save_checkpoint(self.net.module, self.args, epoch, 0, False) 
Example #5
Source File: sagemaker_predictive_maintenance_entry_point.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def train(net, train_dataloader, epochs, batch_size, is_many_to_one, model_dir):
    loss_fn = RMSE_many_to_one if is_many_to_one else RMSE_many_to_many
    INPUT_SCALER = 300

    for e in range(epochs):
        loss_avg = 0
        for i, ((data, data_lengths), (label)) in enumerate(train_data):
            data = data.as_in_context(ctx).astype('float32')
            label = label.as_in_context(ctx).astype('float32')
            data_lengths = data_lengths.as_in_context(ctx).astype('float32')
            with autograd.record():
                pred = net(data)
                loss, loss_no_weight = loss_fn(pred, label / INPUT_SCALER, data_lengths)
                loss = loss.mean()
            loss.backward()
            trainer.step(data.shape[0])
            loss_avg += loss_no_weight.mean().sqrt().asnumpy()
        logging.info("Epoch {}: Average RMSE {}".format(e, INPUT_SCALER * loss_avg / (i + 1)))
    
    save_model(net, model_dir)
    logging.info("Saved model params")
    logging.info("End of training") 
Example #6
Source File: test_operator_gpu.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def test_softmax_activation():
    gpu_a = mx.nd.array([[3., 0.5, -0.5, 2., 7.],
        [2., -.4, 7.,   3., 0.2]], ctx=mx.gpu(0))
    cpu_a = mx.nd.array([[3., 0.5, -0.5, 2., 7.],
        [2., -.4, 7.,   3., 0.2]], ctx=mx.cpu())

    cpu_a.attach_grad()
    gpu_a.attach_grad()
    with mx.autograd.record():
        gpu_y = mx.nd.SoftmaxActivation(data = gpu_a)
        cpu_y = mx.nd.SoftmaxActivation(data = cpu_a)
        assert_almost_equal(cpu_y.asnumpy(), gpu_y.asnumpy(), atol = 1e-3, rtol = 1e-3)

        gpu_y.backward()
        cpu_y.backward()
        assert_almost_equal(cpu_a.grad.asnumpy(), gpu_a.grad.asnumpy(),
                atol = 1e-3, rtol = 1e-3) 
Example #7
Source File: test_operator_gpu.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def test_batchnorm_backwards_notrain():
    for ctx in [mx.cpu(0), mx.gpu(0)]:
        for cudnn_o in [False, True]:
            B,C,H,W = 4,3,2,2
            x = mx.nd.random.poisson(1,shape=(B,C,H,W)).as_in_context(ctx)
            gamma = mx.nd.random.normal(shape=(C)).as_in_context(ctx)
            beta = mx.nd.random.normal(shape=(C)).as_in_context(ctx)
            mean = mx.nd.random.normal(shape=(C)).as_in_context(ctx)
            std = mx.nd.random.normal(shape=(C)).as_in_context(ctx)
            x.attach_grad()

            with autograd.record(False):
                y = mx.ndarray.BatchNorm(x, gamma, beta, mean, std.square(),
                                         fix_gamma=False, cudnn_off=cudnn_o)
                loss=y.square().sum()
            loss.backward(train_mode=False) 
Example #8
Source File: test_operator_gpu.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def test_cross_device_autograd():
    x = mx.nd.random.uniform(shape=(10,))
    x.attach_grad()

    with mx.autograd.record():
        y = mx.nd.tanh(x)
        y = y.copyto(mx.gpu(0))
        y = mx.nd.tanh(y)
        y = y.copyto(mx.cpu(0))
        y = mx.nd.tanh(y)
        y = y.copyto(mx.gpu(0))
        y = y.copyto(mx.gpu(0))

        y.backward()

    dx = x.grad.asnumpy()
    x.grad[:] = 0

    with mx.autograd.record():
        y = x
        for i in range(3):
            y = mx.nd.tanh(y)
        y.backward()

    assert_almost_equal(dx, x.grad.asnumpy()) 
Example #9
Source File: test_autograd.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def train(net, epoch, ctx_list):
    net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx_list)
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5})
    metric = mx.metric.Accuracy()
    loss = gluon.loss.SoftmaxCrossEntropyLoss()

    for i in range(epoch):
        train_data.reset()
        for batch in train_data:
            datas = gluon.utils.split_and_load(batch.data[0], ctx_list, batch_axis=0)
            labels = gluon.utils.split_and_load(batch.label[0], ctx_list, batch_axis=0)
            outputs = []
            with autograd.record():
                for x, y in zip(datas, labels):
                    z = net(x)
                    L = loss(z, y)
                    L.backward()
                    outputs.append(z)
            trainer.step(batch.data[0].shape[0])
            metric.update(labels, outputs)
        name, acc = metric.get()
        metric.reset()
        print('training acc at epoch %d: %s=%f'%(i, name, acc)) 
Example #10
Source File: train.py    From cascade_rcnn_gluon with Apache License 2.0 6 votes vote down vote up
def training(self, epoch):
        tbar = tqdm(self.train_data)
        train_loss = 0.0
        for i, (data, target) in enumerate(tbar):
            self.lr_scheduler.update(i, epoch)
            with autograd.record(True):
                outputs = self.net(data)
                losses = self.criterion(outputs, target)
                mx.nd.waitall()
                autograd.backward(losses)
            self.optimizer.step(self.args.batch_size)
            for loss in losses:
                train_loss += loss.asnumpy()[0] / len(losses)
            tbar.set_description('Epoch %d, training loss %.3f'%\
                (epoch, train_loss/(i+1)))
            mx.nd.waitall()

        # save every epoch
        save_checkpoint(self.net.module, self.args, False) 
Example #11
Source File: utils.py    From d2l-zh with Apache License 2.0 6 votes vote down vote up
def train(train_iter, test_iter, net, loss, trainer, ctx, num_epochs):
    """Train and evaluate a model."""
    print('training on', ctx)
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, m, start = 0.0, 0.0, 0, 0, time.time()
        for i, batch in enumerate(train_iter):
            Xs, ys, batch_size = _get_batch(batch, ctx)   
            with autograd.record():
                y_hats = [net(X) for X in Xs]
                ls = [loss(y_hat, y) for y_hat, y in zip(y_hats, ys)]
            for l in ls:
                l.backward()
            trainer.step(batch_size)
            train_l_sum += sum([l.sum().asscalar() for l in ls])
            n += sum([l.size for l in ls])
            train_acc_sum += sum([(y_hat.argmax(axis=1) == y).sum().asscalar()
                                 for y_hat, y in zip(y_hats, ys)])
            m += sum([y.size for y in ys])
        test_acc = evaluate_accuracy(test_iter, net, ctx)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / m, test_acc,
                 time.time() - start)) 
Example #12
Source File: kaggle_k_fold_cross_validation.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def train(net, X_train, y_train, epochs, verbose_epoch, learning_rate,
          weight_decay, batch_size):
    """Trains the model."""
    dataset_train = gluon.data.ArrayDataset(X_train, y_train)
    data_iter_train = gluon.data.DataLoader(dataset_train, batch_size,
                                            shuffle=True)
    trainer = gluon.Trainer(net.collect_params(), 'adam',
                            {'learning_rate': learning_rate,
                             'wd': weight_decay})
    net.initialize(force_reinit=True)
    for epoch in range(epochs):
        for data, label in data_iter_train:
            with autograd.record():
                output = net(data)
                loss = square_loss(output, label)
            loss.backward()
            trainer.step(batch_size)
            avg_loss = get_rmse_log(net, X_train, y_train)
        if epoch > verbose_epoch:
            print("Epoch %d, train loss: %f" % (epoch, avg_loss))
    return avg_loss 
Example #13
Source File: utils.py    From d2l-zh with Apache License 2.0 6 votes vote down vote up
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
              params=None, lr=None, trainer=None):
    """Train and evaluate a model with CPU."""
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y).sum()
            l.backward()
            if trainer is None:
                sgd(params, lr, batch_size)
            else:
                trainer.step(batch_size)
            y = y.astype('float32')
            train_l_sum += l.asscalar()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
            n += y.size
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc)) 
Example #14
Source File: utils.py    From d2l-zh with Apache License 2.0 6 votes vote down vote up
def train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx,
              num_epochs):
    """Train and evaluate a model with CPU or GPU."""
    print('training on', ctx)
    loss = gloss.SoftmaxCrossEntropyLoss()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            X, y = X.as_in_context(ctx), y.as_in_context(ctx)
            with autograd.record():
                y_hat = net(X)
                l = loss(y_hat, y).sum()
            l.backward()
            trainer.step(batch_size)
            y = y.astype('float32')
            train_l_sum += l.asscalar()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
            n += y.size
        test_acc = evaluate_accuracy(test_iter, net, ctx)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start)) 
Example #15
Source File: utils.py    From d2l-zh with Apache License 2.0 6 votes vote down vote up
def train(train_iter, test_iter, net, loss, trainer, ctx, num_epochs):
    """Train and evaluate a model."""
    print('training on', ctx)
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, m, start = 0.0, 0.0, 0, 0, time.time()
        for i, batch in enumerate(train_iter):
            Xs, ys, batch_size = _get_batch(batch, ctx)   
            with autograd.record():
                y_hats = [net(X) for X in Xs]
                ls = [loss(y_hat, y) for y_hat, y in zip(y_hats, ys)]
            for l in ls:
                l.backward()
            trainer.step(batch_size)
            train_l_sum += sum([l.sum().asscalar() for l in ls])
            n += sum([l.size for l in ls])
            train_acc_sum += sum([(y_hat.argmax(axis=1) == y).sum().asscalar()
                                 for y_hat, y in zip(y_hats, ys)])
            m += sum([y.size for y in ys])
        test_acc = evaluate_accuracy(test_iter, net, ctx)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / m, test_acc,
                 time.time() - start)) 
Example #16
Source File: train_gpu.py    From MXNet-Deep-Learning-in-Action with Apache License 2.0 5 votes vote down vote up
def train(args, train_data, val_data, net, ctx):
    trainer = gluon.Trainer(params=net.collect_params(),
                            optimizer='sgd',
                            optimizer_params={'learning_rate':0.05})
    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
    epoch_index = 0
    for epoch in range(args.num_epoch):
        train_loss = 0.0
        train_acc = 0.0
        val_acc = 0.0
        tic = time()
        for data, label in train_data:
            data_part = gluon.utils.split_and_load(data, ctx)
            label_part = gluon.utils.split_and_load(label, ctx)
            with autograd.record():
                losses = [softmax_cross_entropy(net(data), label) for data, label in zip(data_part, label_part)]
            for loss_i in losses:
                loss_i.backward()
                train_loss += loss_i.mean().asscalar()
            trainer.step(args.batch_size)
            train_acc += sum([acc(net(data), label) for data, label in zip(data_part, label_part)])
        for data, label in val_data:
            data_part = gluon.utils.split_and_load(data, ctx)
            label_part = gluon.utils.split_and_load(label, ctx)
            val_acc += sum([acc(net(data), label) for data, label in zip(data_part, label_part)])
        epoch_index += 1
        if epoch_index % args.save_step == 0:
            net.save_parameters("{}-{}.params".format(args.save_prefix, epoch))
            print("save model to {}-{}.params".format(args.save_prefix, epoch))
        print("Epoch {}: Loss {:.4f}, Train accuracy {:.4f}, \
               Val accuracy {:.4f}, Time {:.4f}sec".format(epoch,
                                                   train_loss/len(train_data),
                                                   train_acc/(len(train_data)*len(ctx)),
                                                   val_acc/(len(val_data)*len(ctx)),
                                                   time()-tic)) 
Example #17
Source File: train.py    From ST-MetaNet with MIT License 5 votes vote down vote up
def process_data(self, epoch, dataloader, metrics=None, is_training=True, title='[TRAIN]'):
		speedometer = Speedometer(title, epoch, frequent=50)
		speedometer.reset()
		if metrics is not None:
			for metric in metrics:
				metric.reset()
		
		for nbatch, batch_data in enumerate(dataloader):
			inputs = [gluon.utils.split_and_load(x, self.ctx) for x in batch_data]
			if is_training:
				self.net.decoder.global_steps += 1.0

				with autograd.record():
					outputs = [self.net(*x, is_training) for x in zip(*inputs)]

				for out in outputs:
					out[0].backward()

				self.step(batch_data[0].shape[0])
			else: outputs = [self.net(*x, False) for x in zip(*inputs)]

			if metrics is not None:
				for metric in metrics:
					for out in outputs:
						metric.update(*out[1])
			speedometer.log_metrics(nbatch + 1, metrics)

		speedometer.finish(metrics) 
Example #18
Source File: train_cpu.py    From MXNet-Deep-Learning-in-Action with Apache License 2.0 5 votes vote down vote up
def train(args, train_data, val_data, net):
    trainer = gluon.Trainer(params=net.collect_params(),
                            optimizer='sgd',
                            optimizer_params={'learning_rate': 0.05})
    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
    epoch_index = 0
    for epoch in range(args.num_epoch):
        train_loss = 0.0
        train_acc = 0.0
        val_acc = 0.0
        tic = time()
        for data, label in train_data:
            with autograd.record():
                output = net(data)
                loss = softmax_cross_entropy(output, label)
            loss.backward()
            train_loss += loss.mean().asscalar()
            trainer.step(args.batch_size)
            train_acc += acc(output, label)
        for data, label in val_data:
            val_acc += acc(net(data), label)
        epoch_index += 1
        if epoch_index % args.save_step == 0:
            net.save_parameters("{}-{}.params".format(args.save_prefix, epoch))
            print("save model to {}-{}.params".format(args.save_prefix, epoch))
        print("Epoch {}: Loss {:.4f}, Train accuracy {:.4f}, \
               Val accuracy {:.4f}, Time {:.4f}sec".format(epoch,
                                                   train_loss/len(train_data),
                                                   train_acc/(len(train_data)),
                                                   val_acc/(len(val_data)),
                                                   time()-tic)) 
Example #19
Source File: test_utils_parallel.py    From cascade_rcnn_gluon with Apache License 2.0 5 votes vote down vote up
def test_data_parallel():
    # test gluon.contrib.parallel.DataParallelModel
    net = nn.HybridSequential()
    with net.name_scope():
        net.add(nn.Conv2D(in_channels=1, channels=20, kernel_size=5))
        net.add(nn.Activation('relu'))
        net.add(nn.MaxPool2D(pool_size=2, strides=2))
        net.add(nn.Conv2D(in_channels=20, channels=50, kernel_size=5))
        net.add(nn.Activation('relu'))
        net.add(nn.MaxPool2D(pool_size=2, strides=2))
        # The Flatten layer collapses all axis, except the first one, into one axis.
        net.add(nn.Flatten())
        net.add(nn.Dense(512,in_units=800))
        net.add(nn.Activation('relu'))
        net.add(nn.Dense(10, in_units=512))

    net.collect_params().initialize()
    criterion = gluon.loss.SoftmaxCELoss(axis=1)

    def test_net_sync(net, criterion, sync, nDevices):
        ctx_list = [mx.cpu(0) for i in range(nDevices)]
        net = DataParallelModel(net, ctx_list, sync=sync)
        criterion = DataParallelCriterion(criterion, ctx_list, sync=sync)
        iters = 100
        # train mode
        for i in range(iters):
            x = mx.random.uniform(shape=(8, 1, 28, 28))
            t = nd.ones(shape=(8))
            with autograd.record():
                y = net(x)
                loss = criterion(y, t)
                autograd.backward(loss)
        # evaluation mode
        for i in range(iters):
            x = mx.random.uniform(shape=(8, 1, 28, 28))
            y = net(x)

    test_net_sync(net, criterion, True, 1)
    test_net_sync(net, criterion, True, 2)
    test_net_sync(net, criterion, False, 1)
    test_net_sync(net, criterion, False, 2) 
Example #20
Source File: utils.py    From d2l-zh with Apache License 2.0 5 votes vote down vote up
def train_and_predict_rnn_gluon(model, num_hiddens, vocab_size, ctx,
                                corpus_indices, idx_to_char, char_to_idx,
                                num_epochs, num_steps, lr, clipping_theta,
                                batch_size, pred_period, pred_len, prefixes):
    """Train an Gluon RNN model and predict the next item in the sequence."""
    loss = gloss.SoftmaxCrossEntropyLoss()
    model.initialize(ctx=ctx, force_reinit=True, init=init.Normal(0.01))
    trainer = gluon.Trainer(model.collect_params(), 'sgd',
                            {'learning_rate': lr, 'momentum': 0, 'wd': 0})

    for epoch in range(num_epochs):
        l_sum, n, start = 0.0, 0, time.time()
        data_iter = data_iter_consecutive(
            corpus_indices, batch_size, num_steps, ctx)
        state = model.begin_state(batch_size=batch_size, ctx=ctx)
        for X, Y in data_iter:
            for s in state:
                s.detach()
            with autograd.record():
                (output, state) = model(X, state)
                y = Y.T.reshape((-1,))
                l = loss(output, y).mean()
            l.backward()
            params = [p.data() for p in model.collect_params().values()]
            grad_clipping(params, clipping_theta, ctx)
            trainer.step(1)
            l_sum += l.asscalar() * y.size
            n += y.size

        if (epoch + 1) % pred_period == 0:
            print('epoch %d, perplexity %f, time %.2f sec' % (
                epoch + 1, math.exp(l_sum / n), time.time() - start))
            for prefix in prefixes:
                print(' -', predict_rnn_gluon(
                    prefix, pred_len, model, vocab_size, ctx, idx_to_char,
                    char_to_idx)) 
Example #21
Source File: utils.py    From d2l-zh with Apache License 2.0 5 votes vote down vote up
def train_gluon_ch7(trainer_name, trainer_hyperparams, features, labels,
                    batch_size=10, num_epochs=2):
    """Train a linear regression model with a given Gluon trainer."""
    net = nn.Sequential()
    net.add(nn.Dense(1))
    net.initialize(init.Normal(sigma=0.01))
    loss = gloss.L2Loss()

    def eval_loss():
        return loss(net(features), labels).mean().asscalar()

    ls = [eval_loss()]
    data_iter = gdata.DataLoader(
        gdata.ArrayDataset(features, labels), batch_size, shuffle=True)
    trainer = gluon.Trainer(net.collect_params(),
                            trainer_name, trainer_hyperparams)
    for _ in range(num_epochs):
        start = time.time()
        for batch_i, (X, y) in enumerate(data_iter):
            with autograd.record():
                l = loss(net(X), y)
            l.backward()
            trainer.step(batch_size)
            if (batch_i + 1) * batch_size % 100 == 0:
                ls.append(eval_loss())
    print('loss: %f, %f sec per epoch' % (ls[-1], time.time() - start))
    set_figsize()
    plt.plot(np.linspace(0, num_epochs, len(ls)), ls)
    plt.xlabel('epoch')
    plt.ylabel('loss') 
Example #22
Source File: train_script.py    From gluon-face with MIT License 5 votes vote down vote up
def train():
    train_net.collect_params().reset_ctx(ctx)
    trainer = gluon.Trainer(train_net.collect_params(), optimizer, optimizer_params)

    metric = mx.metric.Accuracy()
    train_loss = mx.metric.Loss()

    metric.reset()
    train_loss.reset()
    sample_time = time.time()
    for iteration in range(1, int(num_iterations + 1)):
        Loss = SML if iteration < loss_warmup_iters else AFL
        batch = next(batch_generator)
        trans = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
        labels = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)

        with autograd.record():
            outputs = [train_net(X.astype(dtype, copy=False))[1] for X in trans]
            losses = [Loss(yhat, y.astype(dtype, copy=False)) for yhat, y in zip(outputs, labels)]
        for loss in losses:
            loss.backward()
        trainer.step(batch_size)

        train_loss.update(0, losses)
        metric.update(labels, outputs)
        if iteration % opt.cat_interval == 0:
            num_samples = (opt.cat_interval * batch_size) // (time.time() - sample_time)
            _, train_acc = metric.get()
            _, epoch_loss = train_loss.get()
            metric.reset()
            train_loss.reset()
            epoch_str = ("Iter %d. Loss: %.5f, Train acc %f, %d samples/s."
                         % (iteration, epoch_loss, train_acc, num_samples))
            logger.info(epoch_str + 'lr ' + str(trainer.learning_rate))
            train_net.save_parameters("%s/%s-it-%d.params" % (opt.save_dir, opt.model, iteration))
            trainer.save_states("%s/%s-it-%d.states" % (opt.save_dir, opt.model, iteration))
            results = validate()
            for result in results:
                logger.info('{}'.format(result))
            sample_time = time.time() 
Example #23
Source File: base_test.py    From STGCN with GNU General Public License v3.0 5 votes vote down vote up
def test_stgcn(self):

        from model import base_model

        ctx = mx.gpu(1)
        num_of_vertices = 228
        batch_size = 8
        cheb_polys = nd.random_uniform(shape=(num_of_vertices,
                                              num_of_vertices * 3),
                                       ctx=ctx)
        blocks = [[1, 32, 64], [64, 32, 128]]
        x = nd.random_uniform(shape=(batch_size, 1, 12, num_of_vertices),
                              ctx=ctx)
        y = nd.random_uniform(shape=(batch_size, 1, 1, num_of_vertices),
                              ctx=ctx)

        net = base_model.STGCN(12, 3, 3, blocks, 1.0, cheb_polys)
        net.initialize(ctx=ctx)
        self.assertEqual((batch_size, 1, 1, num_of_vertices), net(x).shape)

        trainer = gluon.Trainer(net.collect_params(), 'adam')
        trainer.set_learning_rate(1e-3)
        loss = gluon.loss.L2Loss()

        for i in range(5):
            with autograd.record():
                l = loss(net(x), y)
            l.backward()
            trainer.step(batch_size)
            self.assertIsInstance(l.mean().asscalar().item(), float)
            print(l.mean().asscalar()) 
Example #24
Source File: test_utils_parallel.py    From panoptic-fpn-gluon with Apache License 2.0 5 votes vote down vote up
def test_data_parallel():
    # test gluon.contrib.parallel.DataParallelModel
    net = nn.HybridSequential()
    with net.name_scope():
        net.add(nn.Conv2D(in_channels=1, channels=20, kernel_size=5))
        net.add(nn.Activation('relu'))
        net.add(nn.MaxPool2D(pool_size=2, strides=2))
        net.add(nn.Conv2D(in_channels=20, channels=50, kernel_size=5))
        net.add(nn.Activation('relu'))
        net.add(nn.MaxPool2D(pool_size=2, strides=2))
        # The Flatten layer collapses all axis, except the first one, into one axis.
        net.add(nn.Flatten())
        net.add(nn.Dense(512,in_units=800))
        net.add(nn.Activation('relu'))
        net.add(nn.Dense(10, in_units=512))

    net.collect_params().initialize()
    criterion = gluon.loss.SoftmaxCELoss(axis=1)

    def test_net_sync(net, criterion, sync, nDevices):
        ctx_list = [mx.cpu(0) for i in range(nDevices)]
        net = DataParallelModel(net, ctx_list, sync=sync)
        criterion = DataParallelCriterion(criterion, ctx_list, sync=sync)
        iters = 100
        # train mode
        for i in range(iters):
            x = mx.random.uniform(shape=(8, 1, 28, 28))
            t = nd.ones(shape=(8))
            with autograd.record():
                y = net(x)
                loss = criterion(y, t)
                autograd.backward(loss)
        # evaluation mode
        for i in range(iters):
            x = mx.random.uniform(shape=(8, 1, 28, 28))
            y = net(x)

    test_net_sync(net, criterion, True, 1)
    test_net_sync(net, criterion, True, 2)
    test_net_sync(net, criterion, False, 1)
    test_net_sync(net, criterion, False, 2) 
Example #25
Source File: super_resolution.py    From training_results_v0.6 with Apache License 2.0 5 votes vote down vote up
def train(epoch, ctx):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    net.initialize(mx.init.Orthogonal(), ctx=ctx)
    # re-initialize conv4's weight to be Orthogonal
    net.conv4.initialize(mx.init.Orthogonal(scale=1), force_reinit=True, ctx=ctx)
    trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': opt.lr})
    loss = gluon.loss.L2Loss()

    for i in range(epoch):
        train_data.reset()
        for batch in train_data:
            data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
            label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
            outputs = []
            with ag.record():
                for x, y in zip(data, label):
                    z = net(x)
                    L = loss(z, y)
                    L.backward()
                    outputs.append(z)
            trainer.step(batch.data[0].shape[0])
            metric.update(label, outputs)

        name, acc = metric.get()
        metric.reset()
        print('training mse at epoch %d: %s=%f'%(i, name, acc))
        test(ctx)

    net.save_parameters('superres.params') 
Example #26
Source File: mnist.py    From training_results_v0.6 with Apache License 2.0 5 votes vote down vote up
def train(epochs, ctx):
    # Collect all parameters from net and its children, then initialize them.
    net.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
    # Trainer is for updating parameters with gradient.
    trainer = gluon.Trainer(net.collect_params(), 'sgd',
                            {'learning_rate': opt.lr, 'momentum': opt.momentum})
    metric = mx.metric.Accuracy()
    loss = gluon.loss.SoftmaxCrossEntropyLoss()

    for epoch in range(epochs):
        # reset data iterator and metric at begining of epoch.
        metric.reset()
        for i, (data, label) in enumerate(train_data):
            # Copy data to ctx if necessary
            data = data.as_in_context(ctx)
            label = label.as_in_context(ctx)
            # Start recording computation graph with record() section.
            # Recorded graphs can then be differentiated with backward.
            with autograd.record():
                output = net(data)
                L = loss(output, label)
                L.backward()
            # take a gradient step with batch_size equal to data.shape[0]
            trainer.step(data.shape[0])
            # update metric at last.
            metric.update([label], [output])

            if i % opt.log_interval == 0 and i > 0:
                name, acc = metric.get()
                print('[Epoch %d Batch %d] Training: %s=%f'%(epoch, i, name, acc))

        name, acc = metric.get()
        print('[Epoch %d] Training: %s=%f'%(epoch, name, acc))

        name, val_acc = test(ctx)
        print('[Epoch %d] Validation: %s=%f'%(epoch, name, val_acc))

    net.save_parameters('mnist.params') 
Example #27
Source File: mnist_gluon.py    From sagemaker-python-sdk with Apache License 2.0 5 votes vote down vote up
def train_model(net, epochs, ctx, learning_rate, momentum, train_data, val_data):
    # Collect all parameters from net and its children, then initialize them.
    net.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
    # Trainer is for updating parameters with gradient.
    trainer = gluon.Trainer(
        net.collect_params(), "sgd", {"learning_rate": learning_rate, "momentum": momentum}
    )
    metric = mx.metric.Accuracy()
    loss = gluon.loss.SoftmaxCrossEntropyLoss()

    for epoch in range(epochs):
        # reset data iterator and metric at begining of epoch.
        metric.reset()
        for i, (data, label) in enumerate(train_data):
            # Copy data to ctx if necessary
            data = data.as_in_context(ctx)
            label = label.as_in_context(ctx)
            # Start recording computation graph with record() section.
            # Recorded graphs can then be differentiated with backward.
            with autograd.record():
                output = net(data)
                L = loss(output, label)
                L.backward()
            # take a gradient step with batch_size equal to data.shape[0]
            trainer.step(data.shape[0])
            # update metric at last.
            metric.update([label], [output])

            if i % 100 == 0 and i > 0:
                name, acc = metric.get()
                print("[Epoch %d Batch %d] Training: %s=%f" % (epoch, i, name, acc))

        name, acc = metric.get()
        print("[Epoch %d] Training: %s=%f" % (epoch, name, acc))
        name, val_acc = test(ctx, net, val_data)
        print("[Epoch %d] Validation: %s=%f" % (epoch, name, val_acc)) 
Example #28
Source File: hybrid_test.py    From STGCN with GNU General Public License v3.0 5 votes vote down vote up
def test_stgcn(self):

        from model import hybrid_model

        ctx = mx.gpu(1)
        num_of_vertices = 228
        batch_size = 8
        cheb_polys = nd.random_uniform(shape=(num_of_vertices,
                                              num_of_vertices * 3),
                                       ctx=ctx)
        blocks = [[1, 32, 64], [64, 32, 128]]
        x = nd.random_uniform(shape=(batch_size, 1, 12, num_of_vertices),
                              ctx=ctx)
        y = nd.random_uniform(shape=(batch_size, 1, 1, num_of_vertices),
                              ctx=ctx)

        net = hybrid_model.STGCN(12, 3, 3, blocks, 1.0,
                                 num_of_vertices, cheb_polys)
        net.initialize(ctx=ctx)
        net.hybridize()
        self.assertEqual((batch_size, 1, 1, num_of_vertices), net(x).shape)

        trainer = gluon.Trainer(net.collect_params(), 'adam')
        trainer.set_learning_rate(1e-3)
        loss = gluon.loss.L2Loss()

        for i in range(5):
            with autograd.record():
                l = loss(net(x), y)
            l.backward()
            trainer.step(batch_size)
            self.assertIsInstance(l.mean().asscalar().item(), float)
            print('[Test]: {}'.format(l.mean().asscalar())) 
Example #29
Source File: hybrid_test.py    From STGCN with GNU General Public License v3.0 5 votes vote down vote up
def test_stblock(self):

        from model.hybrid_layers import St_conv_block, Output_layer

        num_of_vertices = 228
        cheb_polys = nd.random_uniform(shape=(num_of_vertices,
                                              num_of_vertices * 3))
        net = gluon.nn.Sequential()
        net.add(
            St_conv_block(3, 3, [1, 32, 64], num_of_vertices,
                          1.0, 12, cheb_polys),
            St_conv_block(3, 3, [64, 32, 128], num_of_vertices,
                          1.0, 8, cheb_polys),
            Output_layer(128, 4, num_of_vertices)
        )
        net.initialize()
        net.hybridize()

        x = nd.random_uniform(shape=(8, 1, 12, num_of_vertices))
        y = nd.random_uniform(shape=net(x).shape)

        trainer = gluon.Trainer(net.collect_params(), 'adam')
        trainer.set_learning_rate(1e-3)
        loss = gluon.loss.L2Loss()
        with autograd.record():
            l = loss(net(x), y)
        l.backward()
        trainer.step(8)
        self.assertEqual((8, 1, 1, num_of_vertices), net(x).shape)
        self.assertIsInstance(l.mean().asscalar().item(), float) 
Example #30
Source File: cifar10_dist.py    From training_results_v0.6 with Apache License 2.0 5 votes vote down vote up
def forward_backward(net, data, label):

    # Ask autograd to remember the forward pass
    with autograd.record():
        # Compute the loss on all GPUs
        losses = [loss(net(X), Y) for X, Y in zip(data, label)]

    # Run the backward pass (calculate gradients) on all GPUs
    for l in losses:
        l.backward()

# Train a batch using multiple GPUs