Python mxnet.gluon.Trainer() Examples

The following are 30 code examples of mxnet.gluon.Trainer(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module mxnet.gluon , or try the search function

Example #1

Source File: test_autograd.py From SNIPER-mxnet with Apache License 2.0

6 votes

def train(net, epoch, ctx_list):
    net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx_list)
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5})
    metric = mx.metric.Accuracy()
    loss = gluon.loss.SoftmaxCrossEntropyLoss()

    for i in range(epoch):
        train_data.reset()
        for batch in train_data:
            datas = gluon.utils.split_and_load(batch.data[0], ctx_list, batch_axis=0)
            labels = gluon.utils.split_and_load(batch.label[0], ctx_list, batch_axis=0)
            outputs = []
            with autograd.record():
                for x, y in zip(datas, labels):
                    z = net(x)
                    L = loss(z, y)
                    L.backward()
                    outputs.append(z)
            trainer.step(batch.data[0].shape[0])
            metric.update(labels, outputs)
        name, acc = metric.get()
        metric.reset()
        print('training acc at epoch %d: %s=%f'%(i, name, acc))

Example #2

Source File: train_gl.py From imgclsmob with MIT License

6 votes

def save_params(file_stem,
                net,
                trainer):
    """
    Save current model/trainer parameters.

    Parameters:
    ----------
    file_stem : str
        File stem (with path).
    net : HybridBlock
        Model.
    trainer : Trainer
        Trainer.
    """
    net.save_parameters(file_stem + ".params")
    trainer.save_states(file_stem + ".states")

Example #3

Source File: kaggle_k_fold_cross_validation.py From training_results_v0.6 with Apache License 2.0

6 votes

def train(net, X_train, y_train, epochs, verbose_epoch, learning_rate,
          weight_decay, batch_size):
    """Trains the model."""
    dataset_train = gluon.data.ArrayDataset(X_train, y_train)
    data_iter_train = gluon.data.DataLoader(dataset_train, batch_size,
                                            shuffle=True)
    trainer = gluon.Trainer(net.collect_params(), 'adam',
                            {'learning_rate': learning_rate,
                             'wd': weight_decay})
    net.initialize(force_reinit=True)
    for epoch in range(epochs):
        for data, label in data_iter_train:
            with autograd.record():
                output = net(data)
                loss = square_loss(output, label)
            loss.backward()
            trainer.step(batch_size)
            avg_loss = get_rmse_log(net, X_train, y_train)
        if epoch > verbose_epoch:
            print("Epoch %d, train loss: %f" % (epoch, avg_loss))
    return avg_loss

Example #4

Source File: architecture.py From coach with Apache License 2.0

6 votes

def set_session(self, sess) -> None:
        """
        Initializes the model parameters and creates the model trainer.
        NOTEL Session for mxnet backend must be None.
        :param sess: must be None
        """
        assert sess is None
        # FIXME Add initializer
        self.model.collect_params().initialize(ctx=self._devices)
        # Hybridize model and losses
        self.model.hybridize()
        for l in self.losses:
            l.hybridize()

        # Pass dummy data with correct shape to trigger shape inference and full parameter initialization
        self.model(*self._dummy_model_inputs())

        if self.network_is_trainable:
            self.trainer = gluon.Trainer(
                self.model.collect_params(), optimizer=self.optimizer, update_on_kvstore=False)

Example #5

Source File: training_sda.py From d-SNE with Apache License 2.0

6 votes

def create_trainer(self, inference):
        """
        Create trainer
        :param inference: network
        :return: trainer
        """

        if self.args.optim == 'sgd':
            optim_params = {'learning_rate': self.args.lr, 'wd': self.args.wd, 'momentum': self.args.mom}
        elif self.args.optim == 'adam':
            optim_params = {'learning_rate': self.args.lr, 'wd': self.args.wd}
        else:
            raise NotImplementedError

        trainer = Trainer(inference.collect_params(), optimizer=self.args.optim,
                          optimizer_params=optim_params)
        return trainer

Example #6

Source File: kaggle_k_fold_cross_validation.py From SNIPER-mxnet with Apache License 2.0

6 votes

def train(net, X_train, y_train, epochs, verbose_epoch, learning_rate,
          weight_decay, batch_size):
    """Trains the model."""
    dataset_train = gluon.data.ArrayDataset(X_train, y_train)
    data_iter_train = gluon.data.DataLoader(dataset_train, batch_size,
                                            shuffle=True)
    trainer = gluon.Trainer(net.collect_params(), 'adam',
                            {'learning_rate': learning_rate,
                             'wd': weight_decay})
    net.initialize(force_reinit=True)
    for epoch in range(epochs):
        for data, label in data_iter_train:
            with autograd.record():
                output = net(data)
                loss = square_loss(output, label)
            loss.backward()
            trainer.step(batch_size)
            avg_loss = get_rmse_log(net, X_train, y_train)
        if epoch > verbose_epoch:
            print("Epoch %d, train loss: %f" % (epoch, avg_loss))
    return avg_loss

Example #7

Source File: main.py From CapsuleNet-Gluon with MIT License

6 votes

def train(net,epochs, ctx, train_data,test_data,
            margin_loss, reconstructions_loss, 
            batch_size,scale_factor):
    num_classes = 10
    trainer = gluon.Trainer(
        net.collect_params(),'sgd', {'learning_rate': 0.05, 'wd': 5e-4})

    for epoch in range(epochs):
        train_loss = 0.0
        for batch_idx, (data, label) in tqdm(enumerate(train_data), total=len(train_data), ncols=70, leave=False, unit='b'):
            label = label.as_in_context(ctx)
            data = data.as_in_context(ctx)
            with autograd.record():
                prob, X_l2norm, reconstructions = net(data, label)
                loss1 = margin_loss(data, num_classes,  label, X_l2norm)
                loss2 = reconstructions_loss(reconstructions, data)
                loss = loss1 + scale_factor * loss2
                loss.backward()
            trainer.step(batch_size)
            train_loss += nd.mean(loss).asscalar()
        test_acc = test(test_data, net, ctx)
        print('Epoch:{}, TrainLoss:{:.5f}, TestAcc:{}'.format(epoch,train_loss / len(train_data),test_acc))

Example #8

Source File: test_autograd.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def train(net, epoch, ctx_list):
    net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx_list)
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5})
    metric = mx.metric.Accuracy()
    loss = gluon.loss.SoftmaxCrossEntropyLoss()

    for i in range(epoch):
        train_data.reset()
        for batch in train_data:
            datas = gluon.utils.split_and_load(batch.data[0], ctx_list, batch_axis=0)
            labels = gluon.utils.split_and_load(batch.label[0], ctx_list, batch_axis=0)
            outputs = []
            with autograd.record():
                for x, y in zip(datas, labels):
                    z = net(x)
                    L = loss(z, y)
                    L.backward()
                    outputs.append(z)
            trainer.step(batch.data[0].shape[0])
            metric.update(labels, outputs)
        name, acc = metric.get()
        metric.reset()
        print('training acc at epoch %d: %s=%f'%(i, name, acc))

Example #9

Source File: test_gluon_trainer.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def test_trainer_sparse_save_load():
    x = gluon.Parameter('x', shape=(10, 1), lr_mult=1.0, stype='row_sparse')
    x.initialize(ctx=[mx.cpu(0)], init='zeros')
    trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 0.1})
    all_rows = mx.nd.arange(0, 10, ctx=mx.cpu(0))
    with mx.autograd.record():
        for w in x.list_row_sparse_data(all_rows):
            y = w * 1
            y.backward()
    trainer.step(1)
    assert trainer._kvstore._updater.optimizer._get_lr(0) == 0.1
    trainer.save_states('test_trainer_sparse_save_load.states')
    trainer.load_states('test_trainer_sparse_save_load.states')
    x.lr_mult = 2.0
    # check if parameter dict is correctly associated with optimizer after load_state
    assert trainer._kvstore._updater.optimizer._get_lr(0) == 0.2

Example #10

Source File: test_gluon.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def test_constant():
    class Test(gluon.HybridBlock):
        def __init__(self, **kwargs):
            super(Test, self).__init__(**kwargs)
            self.value = np.asarray([[1,2], [3,4]])
            self.const = self.params.get_constant('const', self.value)

        def hybrid_forward(self, F, x, const):
            return x + const

    test = Test()
    test.initialize()
    trainer = gluon.Trainer(test.collect_params(), 'sgd',
                            {'learning_rate': 1.0, 'momentum': 0.5})

    with mx.autograd.record():
        x = mx.nd.ones((2,2))
        x.attach_grad()
        y = test(x)
        y.backward()

    trainer.step(1)

    assert (test.const.data().asnumpy() == test.value).all()
    assert (x.grad.asnumpy() == 1).all()

Example #11

Source File: test_gluon.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def test_parameter_row_sparse_data():
    ctx0 = mx.cpu(1)
    ctx1 = mx.cpu(2)
    dim0 = 4
    x = gluon.Parameter('x', shape=(dim0, 2), stype='row_sparse')
    x.initialize(init='xavier', ctx=[ctx0, ctx1])
    trainer = gluon.Trainer([x], 'sgd')
    x_param = x._data[0].copy()
    assert x_param.stype == 'row_sparse'
    row_id_0 = mx.nd.array([0,1], ctx=ctx0)
    retained_0 = x.row_sparse_data(row_id_0)
    retained_target_0 = mx.nd.sparse.retain(x_param, row_id_0.as_in_context(ctx0))
    mx.test_utils.assert_almost_equal(retained_0.asnumpy(), retained_target_0.asnumpy())
    assert retained_0.context == ctx0
    row_id_1 = mx.nd.arange(0, dim0, ctx=ctx1)
    retained_1 = x.row_sparse_data(row_id_1)
    retained_target_1 = x_param
    mx.test_utils.assert_almost_equal(retained_1.asnumpy(), retained_target_1.asnumpy())
    assert retained_1.context == ctx1
    row_id_2 = mx.nd.array([0,1,2])
    retained_2 = x.list_row_sparse_data(row_id_2)
    retained_target_2 = mx.nd.sparse.retain(x_param, row_id_2.as_in_context(ctx0))
    mx.test_utils.assert_almost_equal(retained_2[0].asnumpy(), retained_target_2.asnumpy())

Example #12

Source File: test_gluon_autolog.py From mlflow with Apache License 2.0

6 votes

def test_autolog_ends_auto_created_run():
    mlflow.gluon.autolog()

    data = DataLoader(LogsDataset(), batch_size=128, last_batch="discard")

    model = HybridSequential()
    model.add(Dense(64, activation="relu"))
    model.add(Dense(64, activation="relu"))
    model.add(Dense(10))
    model.initialize()
    model.hybridize()

    trainer = Trainer(model.collect_params(), "adam",
                      optimizer_params={"learning_rate": .001, "epsilon": 1e-07})
    est = estimator.Estimator(net=model, loss=SoftmaxCrossEntropyLoss(),
                              metrics=Accuracy(), trainer=trainer)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        est.fit(data, epochs=3)

    assert mlflow.active_run() is None

Example #13

Source File: test_gluon.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def test_sparse_parameter():
    p = gluon.Parameter('weight', shape=(10, 10), stype='row_sparse', grad_stype='row_sparse')
    p.initialize(init='xavier', ctx=[mx.cpu(0), mx.cpu(1)])
    row_id = mx.nd.arange(0, 10, ctx=mx.cpu(1))
    assert len(p.list_grad()) == 2
    # getting row_sparse data without trainer throws an exception
    assertRaises(RuntimeError, p.list_row_sparse_data, row_id)
    trainer = mx.gluon.Trainer([p], 'sgd')
    assert len(p.list_row_sparse_data(row_id)) == 2
    weight = p.row_sparse_data(row_id)
    assert weight.context == mx.cpu(1)
    assert weight.shape == (10, 10)
    assert weight.stype == 'row_sparse'
    assert p.var().name == 'weight'
    assert p.var().attr('__storage_type__') == str(_STORAGE_TYPE_STR_TO_ID['row_sparse'])
    assert p.grad(mx.cpu(0)).stype == 'row_sparse'

    p.reset_ctx(ctx=[mx.cpu(1), mx.cpu(2)])
    assert p.list_ctx() == [mx.cpu(1), mx.cpu(2)]

Example #14

Source File: test_gluon_autolog.py From mlflow with Apache License 2.0

6 votes

def gluon_random_data_run():
    mlflow.gluon.autolog()

    with mlflow.start_run() as run:
        data = DataLoader(LogsDataset(), batch_size=128, last_batch="discard")
        validation = DataLoader(LogsDataset(), batch_size=128, last_batch="discard")

        model = HybridSequential()
        model.add(Dense(64, activation="relu"))
        model.add(Dense(64, activation="relu"))
        model.add(Dense(10))
        model.initialize()
        model.hybridize()
        trainer = Trainer(model.collect_params(), "adam",
                          optimizer_params={"learning_rate": .001, "epsilon": 1e-07})
        est = estimator.Estimator(net=model, loss=SoftmaxCrossEntropyLoss(),
                                  metrics=Accuracy(), trainer=trainer)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            est.fit(data, epochs=3, val_data=validation)
    client = mlflow.tracking.MlflowClient()
    return client.get_run(run.info.run_id)

Example #15

Source File: test_gluon_autolog.py From mlflow with Apache License 2.0

6 votes

def test_autolog_persists_manually_created_run():
    mlflow.gluon.autolog()

    data = DataLoader(LogsDataset(), batch_size=128, last_batch="discard")

    with mlflow.start_run() as run:

        model = HybridSequential()
        model.add(Dense(64, activation="relu"))
        model.add(Dense(64, activation="relu"))
        model.add(Dense(10))
        model.initialize()
        model.hybridize()
        trainer = Trainer(model.collect_params(), "adam",
                          optimizer_params={"learning_rate": .001, "epsilon": 1e-07})
        est = estimator.Estimator(net=model, loss=SoftmaxCrossEntropyLoss(),
                                  metrics=Accuracy(), trainer=trainer)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            est.fit(data, epochs=3)

        assert mlflow.active_run().info.run_id == run.info.run_id

Example #16

Source File: kaggle_k_fold_cross_validation.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def train(net, X_train, y_train, epochs, verbose_epoch, learning_rate,
          weight_decay, batch_size):
    """Trains the model."""
    dataset_train = gluon.data.ArrayDataset(X_train, y_train)
    data_iter_train = gluon.data.DataLoader(dataset_train, batch_size,
                                            shuffle=True)
    trainer = gluon.Trainer(net.collect_params(), 'adam',
                            {'learning_rate': learning_rate,
                             'wd': weight_decay})
    net.initialize(force_reinit=True)
    for epoch in range(epochs):
        for data, label in data_iter_train:
            with autograd.record():
                output = net(data)
                loss = square_loss(output, label)
            loss.backward()
            trainer.step(batch_size)
            avg_loss = get_rmse_log(net, X_train, y_train)
        if epoch > verbose_epoch:
            print("Epoch %d, train loss: %f" % (epoch, avg_loss))
    return avg_loss

Example #17

Source File: test_gluon_model_export.py From mlflow with Apache License 2.0

6 votes

def gluon_model(model_data):
    train_data, train_label, _ = model_data
    train_data_loader = DataLoader(list(zip(train_data, train_label)),
                                   batch_size=128, last_batch="discard")
    model = HybridSequential()
    model.add(Dense(128, activation="relu"))
    model.add(Dense(64, activation="relu"))
    model.add(Dense(10))
    model.initialize()
    model.hybridize()
    trainer = Trainer(model.collect_params(), "adam",
                      optimizer_params={"learning_rate": .001, "epsilon": 1e-07})
    est = estimator.Estimator(net=model, loss=SoftmaxCrossEntropyLoss(),
                              metrics=Accuracy(), trainer=trainer)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        est.fit(train_data_loader, epochs=3)
    return model

Example #18

Source File: utils.py From d2l-zh with Apache License 2.0

5 votes

def train_gluon_ch7(trainer_name, trainer_hyperparams, features, labels,
                    batch_size=10, num_epochs=2):
    """Train a linear regression model with a given Gluon trainer."""
    net = nn.Sequential()
    net.add(nn.Dense(1))
    net.initialize(init.Normal(sigma=0.01))
    loss = gloss.L2Loss()

    def eval_loss():
        return loss(net(features), labels).mean().asscalar()

    ls = [eval_loss()]
    data_iter = gdata.DataLoader(
        gdata.ArrayDataset(features, labels), batch_size, shuffle=True)
    trainer = gluon.Trainer(net.collect_params(),
                            trainer_name, trainer_hyperparams)
    for _ in range(num_epochs):
        start = time.time()
        for batch_i, (X, y) in enumerate(data_iter):
            with autograd.record():
                l = loss(net(X), y)
            l.backward()
            trainer.step(batch_size)
            if (batch_i + 1) * batch_size % 100 == 0:
                ls.append(eval_loss())
    print('loss: %f, %f sec per epoch' % (ls[-1], time.time() - start))
    set_figsize()
    plt.plot(np.linspace(0, num_epochs, len(ls)), ls)
    plt.xlabel('epoch')
    plt.ylabel('loss')

Example #19

Source File: super_resolution.py From SNIPER-mxnet with Apache License 2.0

5 votes

def train(epoch, ctx):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    net.initialize(mx.init.Orthogonal(), ctx=ctx)
    # re-initialize conv4's weight to be Orthogonal
    net.conv4.initialize(mx.init.Orthogonal(scale=1), force_reinit=True, ctx=ctx)
    trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': opt.lr})
    loss = gluon.loss.L2Loss()

    for i in range(epoch):
        train_data.reset()
        for batch in train_data:
            data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
            label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
            outputs = []
            with ag.record():
                for x, y in zip(data, label):
                    z = net(x)
                    L = loss(z, y)
                    L.backward()
                    outputs.append(z)
            trainer.step(batch.data[0].shape[0])
            metric.update(label, outputs)

        name, acc = metric.get()
        metric.reset()
        print('training mse at epoch %d: %s=%f'%(i, name, acc))
        test(ctx)

    net.save_params('superres.params')

Example #20

Source File: mnist.py From SNIPER-mxnet with Apache License 2.0

5 votes

def train(epochs, ctx):
    # Collect all parameters from net and its children, then initialize them.
    net.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
    # Trainer is for updating parameters with gradient.
    trainer = gluon.Trainer(net.collect_params(), 'sgd',
                            {'learning_rate': opt.lr, 'momentum': opt.momentum})
    metric = mx.metric.Accuracy()
    loss = gluon.loss.SoftmaxCrossEntropyLoss()

    for epoch in range(epochs):
        # reset data iterator and metric at begining of epoch.
        metric.reset()
        for i, (data, label) in enumerate(train_data):
            # Copy data to ctx if necessary
            data = data.as_in_context(ctx)
            label = label.as_in_context(ctx)
            # Start recording computation graph with record() section.
            # Recorded graphs can then be differentiated with backward.
            with autograd.record():
                output = net(data)
                L = loss(output, label)
                L.backward()
            # take a gradient step with batch_size equal to data.shape[0]
            trainer.step(data.shape[0])
            # update metric at last.
            metric.update([label], [output])

            if i % opt.log_interval == 0 and i > 0:
                name, acc = metric.get()
                print('[Epoch %d Batch %d] Training: %s=%f'%(epoch, i, name, acc))

        name, acc = metric.get()
        print('[Epoch %d] Training: %s=%f'%(epoch, name, acc))

        name, val_acc = test(ctx)
        print('[Epoch %d] Validation: %s=%f'%(epoch, name, val_acc))

    net.save_params('mnist.params')

Example #21

Source File: test_gluon_trainer.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

5 votes

def test_multi_trainer():
    x = gluon.Parameter('x', shape=(10,), stype='row_sparse')
    x.initialize()
    # test set trainer
    trainer0 = gluon.Trainer([x], 'sgd')
    assert(x._trainer is trainer0)
    # test unset trainer
    x._set_trainer(None)
    assert(x._trainer is None)
    x._set_trainer(trainer0)
    # multiple trainers for a sparse Parameter is not allowed
    trainer1 = gluon.Trainer([x], 'sgd')

Example #22

Source File: enas_utils.py From autogluon with Apache License 2.0

5 votes

def init_default_train_args(batch_size, net, epochs, iters_per_epoch):
    train_args = {}
    base_lr = 0.1 * batch_size / 256
    lr_scheduler = gcv.utils.LRScheduler('cosine', base_lr=base_lr, target_lr=0.0001,
                                         nepochs=epochs, iters_per_epoch=iters_per_epoch)
    optimizer_params = {'wd': 1e-4, 'momentum': 0.9, 'lr_scheduler': lr_scheduler}
    train_args['trainer'] = gluon.Trainer(net.collect_params(), 'sgd', optimizer_params)
    train_args['batch_size'] = batch_size
    train_args['criterion'] = gluon.loss.SoftmaxCrossEntropyLoss()
    return train_args

Example #23

Source File: utils.py From d2l-zh with Apache License 2.0

5 votes

def train_and_predict_rnn_gluon(model, num_hiddens, vocab_size, ctx,
                                corpus_indices, idx_to_char, char_to_idx,
                                num_epochs, num_steps, lr, clipping_theta,
                                batch_size, pred_period, pred_len, prefixes):
    """Train an Gluon RNN model and predict the next item in the sequence."""
    loss = gloss.SoftmaxCrossEntropyLoss()
    model.initialize(ctx=ctx, force_reinit=True, init=init.Normal(0.01))
    trainer = gluon.Trainer(model.collect_params(), 'sgd',
                            {'learning_rate': lr, 'momentum': 0, 'wd': 0})

    for epoch in range(num_epochs):
        l_sum, n, start = 0.0, 0, time.time()
        data_iter = data_iter_consecutive(
            corpus_indices, batch_size, num_steps, ctx)
        state = model.begin_state(batch_size=batch_size, ctx=ctx)
        for X, Y in data_iter:
            for s in state:
                s.detach()
            with autograd.record():
                (output, state) = model(X, state)
                y = Y.T.reshape((-1,))
                l = loss(output, y).mean()
            l.backward()
            params = [p.data() for p in model.collect_params().values()]
            grad_clipping(params, clipping_theta, ctx)
            trainer.step(1)
            l_sum += l.asscalar() * y.size
            n += y.size

        if (epoch + 1) % pred_period == 0:
            print('epoch %d, perplexity %f, time %.2f sec' % (
                epoch + 1, math.exp(l_sum / n), time.time() - start))
            for prefix in prefixes:
                print(' -', predict_rnn_gluon(
                    prefix, pred_len, model, vocab_size, ctx, idx_to_char,
                    char_to_idx))

Example #24

Source File: dcgan.py From training_results_v0.6 with Apache License 2.0

5 votes

def get_configurations(netG, netD):
    # loss
    loss = gluon.loss.SoftmaxCrossEntropyLoss()

    # initialize the generator and the discriminator
    netG.initialize(mx.init.Normal(0.02), ctx=ctx)
    netD.initialize(mx.init.Normal(0.02), ctx=ctx)

    # trainer for the generator and the discriminator
    trainerG = gluon.Trainer(netG.collect_params(), 'adam', {'learning_rate': opt.lr, 'beta1': opt.beta1})
    trainerD = gluon.Trainer(netD.collect_params(), 'adam', {'learning_rate': opt.lr, 'beta1': opt.beta1})

    return loss, trainerG, trainerD

Example #25

Source File: tabular_nn_model.py From autogluon with Apache License 2.0

5 votes

def setup_trainer(self, params, train_dataset=None):
        """ Set up optimizer needed for training.
            Network must first be initialized before this.
        """
        optimizer_opts = {'learning_rate': params['learning_rate'], 'wd': params['weight_decay'], 'clip_gradient': params['clip_gradient']}
        if 'lr_scheduler' in params and params['lr_scheduler'] is not None:
            if train_dataset is None:
                raise ValueError("train_dataset cannot be None when lr_scheduler is specified.")
            base_lr = params.get('base_lr', 1e-6)
            target_lr = params.get('target_lr', 1.0)
            warmup_epochs = params.get('warmup_epochs', 10)
            lr_decay = params.get('lr_decay', 0.1)
            lr_mode = params['lr_scheduler']
            num_batches = train_dataset.num_examples // params['batch_size']
            lr_decay_epoch = [max(warmup_epochs, int(params['num_epochs']/3)), max(warmup_epochs+1, int(params['num_epochs']/2)),
                              max(warmup_epochs+2, int(2*params['num_epochs']/3))]
            lr_scheduler = LRSequential([
                LRScheduler('linear', base_lr=base_lr, target_lr=target_lr, nepochs=warmup_epochs, iters_per_epoch=num_batches),
                LRScheduler(lr_mode, base_lr=target_lr, target_lr=base_lr, nepochs=params['num_epochs'] - warmup_epochs,
                            iters_per_epoch=num_batches, step_epoch=lr_decay_epoch, step_factor=lr_decay, power=2)
            ])
            optimizer_opts['lr_scheduler'] = lr_scheduler
        if params['optimizer'] == 'sgd':
            if 'momentum' in params:
                optimizer_opts['momentum'] = params['momentum']
            optimizer = gluon.Trainer(self.model.collect_params(), 'sgd', optimizer_opts)
        elif params['optimizer'] == 'adam':  # TODO: Can we try AdamW?
            optimizer = gluon.Trainer(self.model.collect_params(), 'adam', optimizer_opts)
        else:
            raise ValueError("Unknown optimizer specified: %s" % params['optimizer'])
        return optimizer

Example #26

Source File: utils.py From d2l-zh with Apache License 2.0

5 votes

def train_and_predict_rnn_gluon(model, num_hiddens, vocab_size, ctx,
                                corpus_indices, idx_to_char, char_to_idx,
                                num_epochs, num_steps, lr, clipping_theta,
                                batch_size, pred_period, pred_len, prefixes):
    """Train an Gluon RNN model and predict the next item in the sequence."""
    loss = gloss.SoftmaxCrossEntropyLoss()
    model.initialize(ctx=ctx, force_reinit=True, init=init.Normal(0.01))
    trainer = gluon.Trainer(model.collect_params(), 'sgd',
                            {'learning_rate': lr, 'momentum': 0, 'wd': 0})

    for epoch in range(num_epochs):
        l_sum, n, start = 0.0, 0, time.time()
        data_iter = data_iter_consecutive(
            corpus_indices, batch_size, num_steps, ctx)
        state = model.begin_state(batch_size=batch_size, ctx=ctx)
        for X, Y in data_iter:
            for s in state:
                s.detach()
            with autograd.record():
                (output, state) = model(X, state)
                y = Y.T.reshape((-1,))
                l = loss(output, y).mean()
            l.backward()
            params = [p.data() for p in model.collect_params().values()]
            grad_clipping(params, clipping_theta, ctx)
            trainer.step(1)
            l_sum += l.asscalar() * y.size
            n += y.size

        if (epoch + 1) % pred_period == 0:
            print('epoch %d, perplexity %f, time %.2f sec' % (
                epoch + 1, math.exp(l_sum / n), time.time() - start))
            for prefix in prefixes:
                print(' -', predict_rnn_gluon(
                    prefix, pred_len, model, vocab_size, ctx, idx_to_char,
                    char_to_idx))

Example #27

Source File: train_script.py From gluon-face with MIT License

5 votes

def train():
    train_net.collect_params().reset_ctx(ctx)
    trainer = gluon.Trainer(train_net.collect_params(), optimizer, optimizer_params)

    metric = mx.metric.Accuracy()
    train_loss = mx.metric.Loss()

    metric.reset()
    train_loss.reset()
    sample_time = time.time()
    for iteration in range(1, int(num_iterations + 1)):
        Loss = SML if iteration < loss_warmup_iters else AFL
        batch = next(batch_generator)
        trans = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
        labels = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)

        with autograd.record():
            outputs = [train_net(X.astype(dtype, copy=False))[1] for X in trans]
            losses = [Loss(yhat, y.astype(dtype, copy=False)) for yhat, y in zip(outputs, labels)]
        for loss in losses:
            loss.backward()
        trainer.step(batch_size)

        train_loss.update(0, losses)
        metric.update(labels, outputs)
        if iteration % opt.cat_interval == 0:
            num_samples = (opt.cat_interval * batch_size) // (time.time() - sample_time)
            _, train_acc = metric.get()
            _, epoch_loss = train_loss.get()
            metric.reset()
            train_loss.reset()
            epoch_str = ("Iter %d. Loss: %.5f, Train acc %f, %d samples/s."
                         % (iteration, epoch_loss, train_acc, num_samples))
            logger.info(epoch_str + 'lr ' + str(trainer.learning_rate))
            train_net.save_parameters("%s/%s-it-%d.params" % (opt.save_dir, opt.model, iteration))
            trainer.save_states("%s/%s-it-%d.states" % (opt.save_dir, opt.model, iteration))
            results = validate()
            for result in results:
                logger.info('{}'.format(result))
            sample_time = time.time()

Example #28

Source File: train_cpu.py From MXNet-Deep-Learning-in-Action with Apache License 2.0

5 votes

def train(args, train_data, val_data, net):
    trainer = gluon.Trainer(params=net.collect_params(),
                            optimizer='sgd',
                            optimizer_params={'learning_rate': 0.05})
    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
    epoch_index = 0
    for epoch in range(args.num_epoch):
        train_loss = 0.0
        train_acc = 0.0
        val_acc = 0.0
        tic = time()
        for data, label in train_data:
            with autograd.record():
                output = net(data)
                loss = softmax_cross_entropy(output, label)
            loss.backward()
            train_loss += loss.mean().asscalar()
            trainer.step(args.batch_size)
            train_acc += acc(output, label)
        for data, label in val_data:
            val_acc += acc(net(data), label)
        epoch_index += 1
        if epoch_index % args.save_step == 0:
            net.save_parameters("{}-{}.params".format(args.save_prefix, epoch))
            print("save model to {}-{}.params".format(args.save_prefix, epoch))
        print("Epoch {}: Loss {:.4f}, Train accuracy {:.4f}, \
               Val accuracy {:.4f}, Time {:.4f}sec".format(epoch,
                                                   train_loss/len(train_data),
                                                   train_acc/(len(train_data)),
                                                   val_acc/(len(val_data)),
                                                   time()-tic))

Example #29

Source File: train_gpu.py From MXNet-Deep-Learning-in-Action with Apache License 2.0

5 votes

def train(args, train_data, val_data, net, ctx):
    trainer = gluon.Trainer(params=net.collect_params(),
                            optimizer='sgd',
                            optimizer_params={'learning_rate':0.05})
    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
    epoch_index = 0
    for epoch in range(args.num_epoch):
        train_loss = 0.0
        train_acc = 0.0
        val_acc = 0.0
        tic = time()
        for data, label in train_data:
            data_part = gluon.utils.split_and_load(data, ctx)
            label_part = gluon.utils.split_and_load(label, ctx)
            with autograd.record():
                losses = [softmax_cross_entropy(net(data), label) for data, label in zip(data_part, label_part)]
            for loss_i in losses:
                loss_i.backward()
                train_loss += loss_i.mean().asscalar()
            trainer.step(args.batch_size)
            train_acc += sum([acc(net(data), label) for data, label in zip(data_part, label_part)])
        for data, label in val_data:
            data_part = gluon.utils.split_and_load(data, ctx)
            label_part = gluon.utils.split_and_load(label, ctx)
            val_acc += sum([acc(net(data), label) for data, label in zip(data_part, label_part)])
        epoch_index += 1
        if epoch_index % args.save_step == 0:
            net.save_parameters("{}-{}.params".format(args.save_prefix, epoch))
            print("save model to {}-{}.params".format(args.save_prefix, epoch))
        print("Epoch {}: Loss {:.4f}, Train accuracy {:.4f}, \
               Val accuracy {:.4f}, Time {:.4f}sec".format(epoch,
                                                   train_loss/len(train_data),
                                                   train_acc/(len(train_data)*len(ctx)),
                                                   val_acc/(len(val_data)*len(ctx)),
                                                   time()-tic))

Example #30

Source File: utils.py From d2l-zh with Apache License 2.0

5 votes

def train_gluon_ch7(trainer_name, trainer_hyperparams, features, labels,
                    batch_size=10, num_epochs=2):
    """Train a linear regression model with a given Gluon trainer."""
    net = nn.Sequential()
    net.add(nn.Dense(1))
    net.initialize(init.Normal(sigma=0.01))
    loss = gloss.L2Loss()

    def eval_loss():
        return loss(net(features), labels).mean().asscalar()

    ls = [eval_loss()]
    data_iter = gdata.DataLoader(
        gdata.ArrayDataset(features, labels), batch_size, shuffle=True)
    trainer = gluon.Trainer(net.collect_params(),
                            trainer_name, trainer_hyperparams)
    for _ in range(num_epochs):
        start = time.time()
        for batch_i, (X, y) in enumerate(data_iter):
            with autograd.record():
                l = loss(net(X), y)
            l.backward()
            trainer.step(batch_size)
            if (batch_i + 1) * batch_size % 100 == 0:
                ls.append(eval_loss())
    print('loss: %f, %f sec per epoch' % (ls[-1], time.time() - start))
    set_figsize()
    plt.plot(np.linspace(0, num_epochs, len(ls)), ls)
    plt.xlabel('epoch')
    plt.ylabel('loss')