Python mxnet.gluon.Parameter() Examples

The following are 21 code examples of mxnet.gluon.Parameter(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module mxnet.gluon , or try the search function .
Example #1
Source File: test_gluon.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def test_sparse_parameter():
    p = gluon.Parameter('weight', shape=(10, 10), stype='row_sparse', grad_stype='row_sparse')
    p.initialize(init='xavier', ctx=[mx.cpu(0), mx.cpu(1)])
    row_id = mx.nd.arange(0, 10, ctx=mx.cpu(1))
    assert len(p.list_grad()) == 2
    # getting row_sparse data without trainer throws an exception
    assertRaises(RuntimeError, p.list_row_sparse_data, row_id)
    trainer = mx.gluon.Trainer([p], 'sgd')
    assert len(p.list_row_sparse_data(row_id)) == 2
    weight = p.row_sparse_data(row_id)
    assert weight.context == mx.cpu(1)
    assert weight.shape == (10, 10)
    assert weight.stype == 'row_sparse'
    assert p.var().name == 'weight'
    assert p.var().attr('__storage_type__') == str(_STORAGE_TYPE_STR_TO_ID['row_sparse'])
    assert p.grad(mx.cpu(0)).stype == 'row_sparse'

    p.reset_ctx(ctx=[mx.cpu(1), mx.cpu(2)])
    assert p.list_ctx() == [mx.cpu(1), mx.cpu(2)] 
Example #2
Source File: test_gluon.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def test_parameter_row_sparse_data():
    ctx0 = mx.cpu(1)
    ctx1 = mx.cpu(2)
    dim0 = 4
    x = gluon.Parameter('x', shape=(dim0, 2), stype='row_sparse')
    x.initialize(init='xavier', ctx=[ctx0, ctx1])
    trainer = gluon.Trainer([x], 'sgd')
    x_param = x._data[0].copy()
    assert x_param.stype == 'row_sparse'
    row_id_0 = mx.nd.array([0,1], ctx=ctx0)
    retained_0 = x.row_sparse_data(row_id_0)
    retained_target_0 = mx.nd.sparse.retain(x_param, row_id_0.as_in_context(ctx0))
    mx.test_utils.assert_almost_equal(retained_0.asnumpy(), retained_target_0.asnumpy())
    assert retained_0.context == ctx0
    row_id_1 = mx.nd.arange(0, dim0, ctx=ctx1)
    retained_1 = x.row_sparse_data(row_id_1)
    retained_target_1 = x_param
    mx.test_utils.assert_almost_equal(retained_1.asnumpy(), retained_target_1.asnumpy())
    assert retained_1.context == ctx1
    row_id_2 = mx.nd.array([0,1,2])
    retained_2 = x.list_row_sparse_data(row_id_2)
    retained_target_2 = mx.nd.sparse.retain(x_param, row_id_2.as_in_context(ctx0))
    mx.test_utils.assert_almost_equal(retained_2[0].asnumpy(), retained_target_2.asnumpy()) 
Example #3
Source File: utils.py    From autogluon with Apache License 2.0 6 votes vote down vote up
def param_to_pretty_string(gluon_param, encoding):
    """
    Take a gluon parameter and transform it to a string amenable to plotting
    If need be, the gluon parameter is appropriately encoded (e.g., log-exp transform).

    :param gluon_param: gluon parameter
    :param encoding: object in charge of encoding/decoding the gluon_param
    """

    assert isinstance(gluon_param, gluon.Parameter)
    assert encoding is not None, \
        "encoding of param {} should not be None".format(gluon_param.name)

    param_as_numpy = encode_unwrap_parameter(
        mx.nd, gluon_param, encoding).asnumpy()
    return "{}: {}".format(
        gluon_param.name, ";".join(
            "{:.6f}".format(value) for value in param_as_numpy)) 
Example #4
Source File: prob_base.py    From xfer with Apache License 2.0 6 votes vote down vote up
def _load_params(filename):
        load_data = np.load(filename, allow_pickle=True)
        params = {}
        raw_params = {}
        for key in load_data.files:
            params[key] = load_data[key]
            raw_params[key] = []
            for parameter in params[key]:
                # Parameter class in mxnet<1.3.0 does not have _stype attribute which causes an error when Parameter
                # saved in mxnet<1.3.0 is loaded with mxnet==1.3.0
                try:
                    parameter._stype
                except AttributeError:
                    parameter._stype = 'default'
                raw_params[key].append(parameter.data())

        return params, raw_params 
Example #5
Source File: prob_base.py    From xfer with Apache License 2.0 6 votes vote down vote up
def _register_param(self, name, init, fix=False):
        """
        Register a set of parameters using "init".

        Its shape is equal to the shape of the variables over which the probability is defined (self.shapes).
        In addition to updating the parameters' dictionary (self.params), this function updates a dictionary with the
        value of the parameters (self.raw_params).

        :param string name: Name of the set of parameters.
        :param init: Define how to initialize the set of parameters
        :type init: mxnet.Initializer
        :param fix: define for each parameter whether the gradient is computed  (True) or not (False). If it is a single
         boolean it is applied globally to the full set of variables.
        :type fix: (boolean or list(boolean))
        """

        par_list = []
        grad_req = self._parse_grad_req(fix, len(self.shapes))
        for shape, gg in zip(self.shapes, grad_req):
            par = gluon.Parameter(name, shape=shape, init=init, grad_req=gg)
            par.initialize(ctx=self.ctx)
            par_list.append(par)
        self.params[name] = par_list
        self.raw_params[name] = [x.data(self.ctx) for x in self.params[name]] 
Example #6
Source File: test_gluon.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 5 votes vote down vote up
def test_parameter():
    p = gluon.Parameter('weight', shape=(10, 10))
    p.initialize(init='xavier', ctx=[mx.cpu(0), mx.cpu(1)])
    assert len(p.list_data()) == 2
    assert len(p.list_grad()) == 2
    assert p.data(mx.cpu(1)).context == mx.cpu(1)
    assert p.data(mx.cpu(0)).shape == (10, 10)
    assert p.var().name == 'weight'
    assert p.grad(mx.cpu(0)).stype == 'default'
    assert p.data(mx.cpu(0)).stype == 'default'

    p.reset_ctx(ctx=[mx.cpu(1), mx.cpu(2)])
    assert p.list_ctx() == [mx.cpu(1), mx.cpu(2)] 
Example #7
Source File: test_gluon.py    From SNIPER-mxnet with Apache License 2.0 5 votes vote down vote up
def test_block_attr_param():
    b = gluon.Block()

    # regular variables can't change types
    b.b = gluon.Parameter()
    b.b = (2,) 
Example #8
Source File: test_gluon.py    From SNIPER-mxnet with Apache License 2.0 5 votes vote down vote up
def test_trainer():
    def dict_equ(a, b):
        assert set(a) == set(b)
        for k in a:
            assert (a[k].asnumpy() == b[k].asnumpy()).all()
    x = gluon.Parameter('x', shape=(10,))
    x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
    trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 1.0, 'momentum': 0.5})
    with mx.autograd.record():
        for w in x.list_data():
            y = w + 1
            y.backward()
    trainer.step(1)

    assert (x.data(mx.cpu(1)).asnumpy() == -2).all()

    x.lr_mult = 0.5

    with mx.autograd.record():
        for w in x.list_data():
            y = w + 1
            y.backward()
    trainer.step(1)

    assert (x.data(mx.cpu(1)).asnumpy() == -4).all()

    trainer.save_states('test.states')
    states = deepcopy(trainer._kvstore._updater.states) if trainer._update_on_kvstore \
             else deepcopy(trainer._updaters[0].states)
    trainer.load_states('test.states')
    if trainer._update_on_kvstore:
        dict_equ(trainer._kvstore._updater.states, states)
        assert trainer._optimizer == trainer._kvstore._updater.optimizer
    else:
        for updater in trainer._updaters:
            dict_equ(updater.states, states)
        assert trainer._optimizer == trainer._updaters[0].optimizer 
Example #9
Source File: test_gluon.py    From SNIPER-mxnet with Apache License 2.0 5 votes vote down vote up
def test_parameter():
    p = gluon.Parameter('weight', shape=(10, 10))
    p.initialize(init='xavier', ctx=[mx.cpu(0), mx.cpu(1)])
    assert len(p.list_data()) == 2
    assert len(p.list_grad()) == 2
    assert p.data(mx.cpu(1)).context == mx.cpu(1)
    assert p.data(mx.cpu(0)).shape == (10, 10)
    assert p.var().name == 'weight'

    p.reset_ctx(ctx=[mx.cpu(1), mx.cpu(2)])
    assert p.list_ctx() == [mx.cpu(1), mx.cpu(2)] 
Example #10
Source File: architecture.py    From coach with Apache License 2.0 5 votes vote down vote up
def get_variable_value(self, variable: Union[gluon.Parameter, NDArray]) -> np.ndarray:
        """
        Get the value of a variable
        :param variable: the variable
        :return: the value of the variable
        """
        if isinstance(variable, gluon.Parameter):
            variable = variable._reduce().asnumpy()
        if isinstance(variable, NDArray):
            return variable.asnumpy()
        return variable 
Example #11
Source File: prob_base.py    From xfer with Apache License 2.0 5 votes vote down vote up
def _register_param_value(self, name, values, fix=False):
        """
        Register a parameter and initialize it with values (list(nd.ndarray)).

        It also checks whether the shape (given by "values") is compatible with the shape of the variables
        over which the probability is defined (self.shapes).

        :param string name: Name of the set of parameters.
        :param values: Value of the set of parameters.
                [np.array]: Parameters shared across all variables.
                [np.array_1, ..., np.array_n]: Different parameters for each variable W_i
        :type values: list[:class:`np.ndarray`]
        :param fix: define for each parameter whether the gradient is computed  (True) or not (False). If it is a single
         boolean it is applied globally to the full set of variables.
        :type fix: (boolean or list(boolean))
        """
        values_shapes = [vv.shape for vv in values]

        if not len(values) == 1:
            if not (len(values_shapes) == len(self.shapes)):
                raise ValueError("Parameter {} size is not compatible".format(name))
            for value_shape, parameter_shape in zip(values_shapes, self.shapes):
                if not (value_shape == parameter_shape or value_shape == (1, 1)):
                    raise ValueError("Parameter {} size is not compatible".format(name))
        else:
            if not values[0].shape == (1, 1):
                raise ValueError("Parameter {} size is not compatible".format(name))

        par_list = []
        grad_req = self._parse_grad_req(fix, len(values))
        for vv, gg in zip(values, grad_req):
            init = mx.init.Constant(vv)
            par = gluon.Parameter(name, shape=vv.shape, init=init, grad_req=gg)
            par.initialize(ctx=self.ctx)
            par_list.append(par)
        self.params[name] = par_list
        self.raw_params[name] = [x.data(self.ctx) for x in self.params[name]] 
Example #12
Source File: test_gluon_trainer.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 5 votes vote down vote up
def test_trainer_save_load():
    x = gluon.Parameter('x', shape=(10,), lr_mult=1.0)
    x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
    trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 0.1})
    with mx.autograd.record():
        for w in x.list_data():
            y = w + 1
            y.backward()
    trainer.step(1)
    assert trainer._kvstore._updater.optimizer._get_lr(0) == 0.1
    trainer.save_states('test_trainer_save_load.states')
    trainer.load_states('test_trainer_save_load.states')
    x.lr_mult = 2.0
    # check if parameter dict is correctly associated with optimizer after load_state
    assert trainer._kvstore._updater.optimizer._get_lr(0) == 0.2 
Example #13
Source File: test_gluon_trainer.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 5 votes vote down vote up
def test_multi_trainer():
    x = gluon.Parameter('x', shape=(10,), stype='row_sparse')
    x.initialize()
    # test set trainer
    trainer0 = gluon.Trainer([x], 'sgd')
    assert(x._trainer is trainer0)
    # test unset trainer
    x._set_trainer(None)
    assert(x._trainer is None)
    x._set_trainer(trainer0)
    # multiple trainers for a sparse Parameter is not allowed
    trainer1 = gluon.Trainer([x], 'sgd') 
Example #14
Source File: test_gluon.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 5 votes vote down vote up
def test_block_attr_param():
    b = gluon.Block()

    # regular variables can't change types
    b.b = gluon.Parameter()
    b.b = (2,) 
Example #15
Source File: test_gluon.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 5 votes vote down vote up
def test_invalid_parameter_grad_stype():
    p = gluon.Parameter('weight', shape=(10, 10), grad_stype='invalid') 
Example #16
Source File: test_gluon.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 5 votes vote down vote up
def test_invalid_parameter_stype():
    p = gluon.Parameter('weight', shape=(10, 10), stype='invalid') 
Example #17
Source File: main.py    From training_results_v0.6 with Apache License 2.0 4 votes vote down vote up
def optimize(args):
    """    Gatys et al. CVPR 2017
    ref: Image Style Transfer Using Convolutional Neural Networks
    """
    if args.cuda:
        ctx = mx.gpu(0)
    else:
        ctx = mx.cpu(0)
    # load the content and style target
    content_image = utils.tensor_load_rgbimage(args.content_image,ctx, size=args.content_size, keep_asp=True)
    content_image = utils.subtract_imagenet_mean_preprocess_batch(content_image)
    style_image = utils.tensor_load_rgbimage(args.style_image, ctx, size=args.style_size)
    style_image = utils.subtract_imagenet_mean_preprocess_batch(style_image)
    # load the pre-trained vgg-16 and extract features
    vgg = net.Vgg16()
    utils.init_vgg_params(vgg, 'models', ctx=ctx)
    # content feature
    f_xc_c = vgg(content_image)[1]
    # style feature
    features_style = vgg(style_image)
    gram_style = [net.gram_matrix(y) for y in features_style]
    # output
    output = Parameter('output', shape=content_image.shape)
    output.initialize(ctx=ctx)
    output.set_data(content_image)
    # optimizer
    trainer = gluon.Trainer([output], 'adam',
                            {'learning_rate': args.lr})
    mse_loss = gluon.loss.L2Loss()

    # optimizing the images
    for e in range(args.iters):
        utils.imagenet_clamp_batch(output.data(), 0, 255)
        # fix BN for pre-trained vgg
        with autograd.record():
            features_y = vgg(output.data())
            content_loss = 2 * args.content_weight * mse_loss(features_y[1], f_xc_c)
            style_loss = 0.
            for m in range(len(features_y)):
                gram_y = net.gram_matrix(features_y[m])
                gram_s = gram_style[m]
                style_loss = style_loss + 2 * args.style_weight * mse_loss(gram_y, gram_s)
            total_loss = content_loss + style_loss
            total_loss.backward()

        trainer.step(1)
        if (e + 1) % args.log_interval == 0:
            print('loss:{:.2f}'.format(total_loss.asnumpy()[0]))

    # save the image
    output = utils.add_imagenet_mean_batch(output.data())
    utils.tensor_save_bgrimage(output[0], args.output_image, args.cuda) 
Example #18
Source File: test_gluon_trainer.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 4 votes vote down vote up
def test_trainer():
    def dict_equ(a, b):
        assert set(a) == set(b)
        for k in a:
            assert (a[k].asnumpy() == b[k].asnumpy()).all()
    x = gluon.Parameter('x', shape=(10,))
    x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
    trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 1.0, 'momentum': 0.5})
    with mx.autograd.record():
        for w in x.list_data():
            y = w + 1
            y.backward()
    trainer.step(1)

    assert (x.data(mx.cpu(1)).asnumpy() == -2).all()

    x.lr_mult = 0.5

    with mx.autograd.record():
        for w in x.list_data():
            y = w + 1
            y.backward()
    trainer.step(1)

    assert (x.data(mx.cpu(1)).asnumpy() == -4).all()

    trainer.save_states('test_trainer.states')
    states = deepcopy(trainer._kvstore._updater.states) if trainer._update_on_kvstore \
             else deepcopy(trainer._updaters[0].states)
    trainer.load_states('test_trainer.states')
    if trainer._update_on_kvstore:
        dict_equ(trainer._kvstore._updater.states, states)
        assert trainer._optimizer == trainer._kvstore._updater.optimizer
    else:
        for updater in trainer._updaters:
            dict_equ(updater.states, states)
        assert trainer._optimizer == trainer._updaters[0].optimizer
    assert_raises(AssertionError, trainer.update, 1)
    assert_raises(AssertionError, trainer.allreduce_grads)

    x = gluon.Parameter('x', shape=(10,))
    x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
    trainer2 = gluon.Trainer([x], 'sgd', {'learning_rate': 1.0, 'momentum': 0.5},
                             update_on_kvstore=False)
    with mx.autograd.record():
        for i, w in enumerate(x.list_data()):
            y = i*w
            y.backward()
    assert (x.grad(mx.cpu(0)).asnumpy() != x.grad(mx.cpu(1)).asnumpy()).all()
    trainer2.allreduce_grads()
    assert (x.grad(mx.cpu(0)).asnumpy() == x.grad(mx.cpu(1)).asnumpy()).all()
    trainer2.update(1)

    assert (x.data(mx.cpu(1)).asnumpy() == -1).all(), x.data(mx.cpu(1)).asnumpy() 
Example #19
Source File: main.py    From MXNet-Gluon-Style-Transfer with MIT License 4 votes vote down vote up
def optimize(args):
    """    Gatys et al. CVPR 2017
    ref: Image Style Transfer Using Convolutional Neural Networks
    """
    if args.cuda:
        ctx = mx.gpu(0)
    else:
        ctx = mx.cpu(0)
    # load the content and style target
    content_image = utils.tensor_load_rgbimage(args.content_image,ctx, size=args.content_size, keep_asp=True)
    content_image = utils.subtract_imagenet_mean_preprocess_batch(content_image)
    style_image = utils.tensor_load_rgbimage(args.style_image, ctx, size=args.style_size)
    style_image = utils.subtract_imagenet_mean_preprocess_batch(style_image)
    # load the pre-trained vgg-16 and extract features
    vgg = net.Vgg16()
    utils.init_vgg_params(vgg, 'models', ctx=ctx)
    # content feature
    f_xc_c = vgg(content_image)[1]
    # style feature
    features_style = vgg(style_image)
    gram_style = [net.gram_matrix(y) for y in features_style]
    # output
    output = Parameter('output', shape=content_image.shape)
    output.initialize(ctx=ctx)
    output.set_data(content_image)
    # optimizer
    trainer = gluon.Trainer([output], 'adam',
                            {'learning_rate': args.lr})
    mse_loss = gluon.loss.L2Loss()

    # optimizing the images
    for e in range(args.iters):
        utils.imagenet_clamp_batch(output.data(), 0, 255)
        # fix BN for pre-trained vgg
        with autograd.record():
            features_y = vgg(output.data())
            content_loss = 2 * args.content_weight * mse_loss(features_y[1], f_xc_c)
            style_loss = 0.
            for m in range(len(features_y)):
                gram_y = net.gram_matrix(features_y[m])
                gram_s = gram_style[m]
                style_loss = style_loss + 2 * args.style_weight * mse_loss(gram_y, gram_s)
            total_loss = content_loss + style_loss
            total_loss.backward()

        trainer.step(1)
        if (e + 1) % args.log_interval == 0:
            print('loss:{:.2f}'.format(total_loss.asnumpy()[0]))

    # save the image
    output = utils.add_imagenet_mean_batch(output.data())
    utils.tensor_save_bgrimage(output[0], args.output_image, args.cuda) 
Example #20
Source File: main.py    From SNIPER-mxnet with Apache License 2.0 4 votes vote down vote up
def optimize(args):
    """    Gatys et al. CVPR 2017
    ref: Image Style Transfer Using Convolutional Neural Networks
    """
    if args.cuda:
        ctx = mx.gpu(0)
    else:
        ctx = mx.cpu(0)
    # load the content and style target
    content_image = utils.tensor_load_rgbimage(args.content_image,ctx, size=args.content_size, keep_asp=True)
    content_image = utils.subtract_imagenet_mean_preprocess_batch(content_image)
    style_image = utils.tensor_load_rgbimage(args.style_image, ctx, size=args.style_size)
    style_image = utils.subtract_imagenet_mean_preprocess_batch(style_image)
    # load the pre-trained vgg-16 and extract features
    vgg = net.Vgg16()
    utils.init_vgg_params(vgg, 'models', ctx=ctx)
    # content feature
    f_xc_c = vgg(content_image)[1]
    # style feature
    features_style = vgg(style_image)
    gram_style = [net.gram_matrix(y) for y in features_style]
    # output
    output = Parameter('output', shape=content_image.shape)
    output.initialize(ctx=ctx)
    output.set_data(content_image)
    # optimizer
    trainer = gluon.Trainer([output], 'adam',
                            {'learning_rate': args.lr})
    mse_loss = gluon.loss.L2Loss()

    # optimizing the images
    for e in range(args.iters):
        utils.imagenet_clamp_batch(output.data(), 0, 255)
        # fix BN for pre-trained vgg
        with autograd.record():
            features_y = vgg(output.data())
            content_loss = 2 * args.content_weight * mse_loss(features_y[1], f_xc_c)
            style_loss = 0.
            for m in range(len(features_y)):
                gram_y = net.gram_matrix(features_y[m])
                gram_s = gram_style[m]
                style_loss = style_loss + 2 * args.style_weight * mse_loss(gram_y, gram_s)
            total_loss = content_loss + style_loss
            total_loss.backward()

        trainer.step(1)
        if (e + 1) % args.log_interval == 0:
            print('loss:{:.2f}'.format(total_loss.asnumpy()[0]))

    # save the image
    output = utils.add_imagenet_mean_batch(output.data())
    utils.tensor_save_bgrimage(output[0], args.output_image, args.cuda) 
Example #21
Source File: main.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 4 votes vote down vote up
def optimize(args):
    """    Gatys et al. CVPR 2017
    ref: Image Style Transfer Using Convolutional Neural Networks
    """
    if args.cuda:
        ctx = mx.gpu(0)
    else:
        ctx = mx.cpu(0)
    # load the content and style target
    content_image = utils.tensor_load_rgbimage(args.content_image,ctx, size=args.content_size, keep_asp=True)
    content_image = utils.subtract_imagenet_mean_preprocess_batch(content_image)
    style_image = utils.tensor_load_rgbimage(args.style_image, ctx, size=args.style_size)
    style_image = utils.subtract_imagenet_mean_preprocess_batch(style_image)
    # load the pre-trained vgg-16 and extract features
    vgg = net.Vgg16()
    utils.init_vgg_params(vgg, 'models', ctx=ctx)
    # content feature
    f_xc_c = vgg(content_image)[1]
    # style feature
    features_style = vgg(style_image)
    gram_style = [net.gram_matrix(y) for y in features_style]
    # output
    output = Parameter('output', shape=content_image.shape)
    output.initialize(ctx=ctx)
    output.set_data(content_image)
    # optimizer
    trainer = gluon.Trainer([output], 'adam',
                            {'learning_rate': args.lr})
    mse_loss = gluon.loss.L2Loss()

    # optimizing the images
    for e in range(args.iters):
        utils.imagenet_clamp_batch(output.data(), 0, 255)
        # fix BN for pre-trained vgg
        with autograd.record():
            features_y = vgg(output.data())
            content_loss = 2 * args.content_weight * mse_loss(features_y[1], f_xc_c)
            style_loss = 0.
            for m in range(len(features_y)):
                gram_y = net.gram_matrix(features_y[m])
                gram_s = gram_style[m]
                style_loss = style_loss + 2 * args.style_weight * mse_loss(gram_y, gram_s)
            total_loss = content_loss + style_loss
            total_loss.backward()

        trainer.step(1)
        if (e + 1) % args.log_interval == 0:
            print('loss:{:.2f}'.format(total_loss.asnumpy()[0]))

    # save the image
    output = utils.add_imagenet_mean_batch(output.data())
    utils.tensor_save_bgrimage(output[0], args.output_image, args.cuda)