Python tensorflow.GradientTape() Examples

The following are 30 code examples of tensorflow.GradientTape(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: classifier.py    From ashpy with Apache License 2.0 6 votes vote down vote up
def train_step(self, features, labels):
        """
        Train step.

        Args:
            features: Input features.
            labels: The labels.

        Returns:
            Loss value.

        """
        with tf.GradientTape() as tape:
            loss = self._loss(
                self._context, features=features, labels=labels, training=True
            )

        gradients = tape.gradient(loss, self._model.trainable_variables)
        self._optimizer.apply_gradients(zip(gradients, self._model.trainable_variables))
        return loss 
Example #2
Source File: train.py    From graphics with Apache License 2.0 6 votes vote down vote up
def wrapped_tf_function(points, label):
  """Performs one step of minimization of the loss."""
  # --- subsampling (order DO matter)
  points = points[0:FLAGS.num_points, ...]

  # --- augmentation
  if FLAGS.augment:
    points = tf.map_fn(augment.rotate, points)
    points = augment.jitter(points)

  # --- training
  with tf.GradientTape() as tape:
    logits = model(points, training=True)
    loss = model.loss(label, logits)
  variables = model.trainable_variables
  gradients = tape.gradient(loss, variables)
  optimizer.apply_gradients(zip(gradients, variables))
  return loss 
Example #3
Source File: gaussian_process_test.py    From BERT with Apache License 2.0 6 votes vote down vote up
def testSparseGaussianProcess(self):
    dataset_size = 10
    batch_size = 3
    input_dim = 4
    output_dim = 5
    features = tf.to_float(np.random.rand(batch_size, input_dim))
    labels = tf.to_float(np.random.rand(batch_size, output_dim))
    model = gaussian_process.SparseGaussianProcess(output_dim, num_inducing=2)
    with tf.GradientTape() as tape:
      predictions = model(features)
      nll = -tf.reduce_mean(predictions.distribution.log_prob(labels))
      kl = sum(model.losses) / dataset_size
      loss = nll + kl

    self.evaluate(tf.global_variables_initializer())
    grads = tape.gradient(nll, model.variables)
    for grad in grads:
      self.assertIsNotNone(grad)

    loss_val, predictions_val = self.evaluate([loss, predictions])
    self.assertEqual(loss_val.shape, ())
    self.assertGreaterEqual(loss_val, 0.)
    self.assertEqual(predictions_val.shape, (batch_size, output_dim)) 
Example #4
Source File: net_work.py    From face_landmark with Apache License 2.0 6 votes vote down vote up
def train_step(self, inputs):
    """One train step.
    Args:
      inputs: one batch input.
    Returns:
      loss: Scaled loss.
    """

    image, label = inputs
    with tf.GradientTape() as tape:
      predictions = self.model(image, training=True)

      loss = self.compute_loss(predictions,label,training=True)

    gradients = tape.gradient(loss, self.model.trainable_variables)
    gradients = [(tf.clip_by_value(grad, -5.0, 5.0))
                 for grad in gradients]
    self.optimizer.apply_gradients(zip(gradients,
                                       self.model.trainable_variables))

    return loss 
Example #5
Source File: training.py    From nlp-journey with Apache License 2.0 6 votes vote down vote up
def train_step(self, inp, tar):
        tar_inp = tar[:, :-1]
        tar_real = tar[:, 1:]

        enc_padding_mask, combined_mask, dec_padding_mask = self.mask_encoder.create_masks(inp, tar_inp)

        with tf.GradientTape() as tape:
            predictions, _ = self.transformer(inp, tar_inp,
                                              True,
                                              enc_padding_mask,
                                              combined_mask,
                                              dec_padding_mask)
            loss = self.loss_function(tar_real, predictions)

        gradients = tape.gradient(loss, self.transformer.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.transformer.trainable_variables))

        self.train_loss(loss)
        self.train_accuracy(tar_real, predictions) 
Example #6
Source File: gaifo.py    From tf2rl with MIT License 6 votes vote down vote up
def _train_body(self, agent_states, agent_next_states, expert_states, expert_next_states):
        epsilon = 1e-8
        with tf.device(self.device):
            with tf.GradientTape() as tape:
                real_logits = self.disc([expert_states, expert_next_states])
                fake_logits = self.disc([agent_states, agent_next_states])
                loss = -(tf.reduce_mean(tf.math.log(real_logits + epsilon)) +
                         tf.reduce_mean(tf.math.log(1. - fake_logits + epsilon)))
            grads = tape.gradient(loss, self.disc.trainable_variables)
            self.optimizer.apply_gradients(
                zip(grads, self.disc.trainable_variables))

        accuracy = \
            tf.reduce_mean(tf.cast(real_logits >= 0.5, tf.float32)) / 2. + \
            tf.reduce_mean(tf.cast(fake_logits < 0.5, tf.float32)) / 2.
        js_divergence = self._compute_js_divergence(
            fake_logits, real_logits)
        return loss, accuracy, js_divergence 
Example #7
Source File: dqn.py    From tf2rl with MIT License 6 votes vote down vote up
def _train_body(self, states, actions, next_states, rewards, done, weights):
        with tf.device(self.device):
            with tf.GradientTape() as tape:
                if self._enable_categorical_dqn:
                    td_errors = self._compute_td_error_body_distributional(
                        states, actions, next_states, rewards, done)
                    q_func_loss = tf.reduce_mean(
                        huber_loss(tf.negative(td_errors),
                                   delta=self.max_grad) * weights)
                else:
                    td_errors = self._compute_td_error_body(
                        states, actions, next_states, rewards, done)
                    q_func_loss = tf.reduce_mean(
                        huber_loss(td_errors,
                                   delta=self.max_grad) * weights)

            q_func_grad = tape.gradient(
                q_func_loss, self.q_func.trainable_variables)
            self.q_func_optimizer.apply_gradients(
                zip(q_func_grad, self.q_func.trainable_variables))

            return td_errors, q_func_loss 
Example #8
Source File: hardshrink_test.py    From addons with Apache License 2.0 6 votes vote down vote up
def verify_funcs_are_equivalent(dtype):
    x_np = np.random.uniform(-10, 10, size=(4, 4)).astype(dtype)
    x = tf.convert_to_tensor(x_np)
    lower = np.random.uniform(-10, 10)
    upper = lower + np.random.uniform(0, 10)

    with tf.GradientTape(persistent=True) as t:
        t.watch(x)
        y_native = _hardshrink_custom_op(x, lower, upper)
        y_py = _hardshrink_py(x, lower, upper)

    test_utils.assert_allclose_according_to_type(y_native, y_py)

    grad_native = t.gradient(y_native, x)
    grad_py = t.gradient(y_py, x)

    test_utils.assert_allclose_according_to_type(grad_native, grad_py) 
Example #9
Source File: gail.py    From tf2rl with MIT License 6 votes vote down vote up
def _train_body(self, agent_states, agent_acts, expert_states, expert_acts):
        epsilon = 1e-8
        with tf.device(self.device):
            with tf.GradientTape() as tape:
                real_logits = self.disc([expert_states, expert_acts])
                fake_logits = self.disc([agent_states, agent_acts])
                loss = -(tf.reduce_mean(tf.math.log(real_logits + epsilon)) +
                         tf.reduce_mean(tf.math.log(1. - fake_logits + epsilon)))
            grads = tape.gradient(loss, self.disc.trainable_variables)
            self.optimizer.apply_gradients(
                zip(grads, self.disc.trainable_variables))

        accuracy = \
            tf.reduce_mean(tf.cast(real_logits >= 0.5, tf.float32)) / 2. + \
            tf.reduce_mean(tf.cast(fake_logits < 0.5, tf.float32)) / 2.
        js_divergence = self._compute_js_divergence(
            fake_logits, real_logits)
        return loss, accuracy, js_divergence 
Example #10
Source File: softshrink_test.py    From addons with Apache License 2.0 6 votes vote down vote up
def verify_funcs_are_equivalent(dtype):
    x_np = np.random.uniform(-10, 10, size=(4, 4)).astype(dtype)
    x = tf.convert_to_tensor(x_np)
    lower = np.random.uniform(-10, 10)
    upper = lower + np.random.uniform(0, 10)

    with tf.GradientTape(persistent=True) as t:
        t.watch(x)
        y_native = softshrink(x, lower, upper)
        y_py = _softshrink_py(x, lower, upper)

    test_utils.assert_allclose_according_to_type(y_native, y_py)

    grad_native = t.gradient(y_native, x)
    grad_py = t.gradient(y_py, x)

    test_utils.assert_allclose_according_to_type(grad_native, grad_py) 
Example #11
Source File: interpolate_spline_test.py    From addons with Apache License 2.0 6 votes vote down vote up
def test_interpolation_gradient():
    """Correctness of gradients is assumed. We compute them
    and check they exist.
    """
    tp = _QuadraticPlusSinProblemND()
    (query_points, _, train_points, train_values) = tp.get_problem(optimizable=True)

    regularization = 0.001
    for interpolation_order in (1, 2, 3, 4):

        with tf.GradientTape() as g:
            interpolator = interpolate_spline(
                train_points,
                train_values,
                query_points,
                interpolation_order,
                regularization,
            )

        gradients = g.gradient(interpolator, train_points).numpy()
        assert np.sum(np.abs(gradients)) != 0 
Example #12
Source File: gan.py    From deepchem with MIT License 6 votes vote down vote up
def call(self, inputs, conditional_inputs):
    with tf.GradientTape() as tape:
      for layer in inputs:
        tape.watch(layer)
      output = self.discriminator(_list_or_tensor(inputs + conditional_inputs))
    gradients = tape.gradient(output, inputs)
    gradients = [g for g in gradients if g is not None]
    if len(gradients) > 0:
      norm2 = 0.0
      for g in gradients:
        g2 = tf.square(g)
        dims = len(g.shape)
        if dims > 1:
          g2 = tf.reduce_sum(g2, axis=list(range(1, dims)))
        norm2 += g2
      penalty = tf.square(tf.sqrt(norm2) - 1.0)
      penalty = self.gan.gradient_penalty * tf.reduce_mean(penalty)
    else:
      penalty = 0.0
    return [output, penalty] 
Example #13
Source File: main.py    From Fast-SRGAN with MIT License 6 votes vote down vote up
def pretrain_step(model, x, y):
    """
    Single step of generator pre-training.
    Args:
        model: A model object with a tf keras compiled generator.
        x: The low resolution image tensor.
        y: The high resolution image tensor.
    """
    with tf.GradientTape() as tape:
        fake_hr = model.generator(x)
        loss_mse = tf.keras.losses.MeanSquaredError()(y, fake_hr)

    grads = tape.gradient(loss_mse, model.generator.trainable_variables)
    model.gen_optimizer.apply_gradients(zip(grads, model.generator.trainable_variables))

    return loss_mse 
Example #14
Source File: quantizers_test.py    From larq with Apache License 2.0 6 votes vote down vote up
def test_swish_grad(self):
        def swish_grad(x, beta):
            return (
                beta * (2 - beta * x * np.tanh(beta * x / 2)) / (1 + np.cosh(beta * x))
            )

        x = testing_utils.generate_real_values_with_zeros(shape=(8, 3, 3, 16))
        tf_x = tf.Variable(x)
        with tf.GradientTape() as tape:
            activation = lq.quantizers.SwishSign()(tf_x)
        grad = tape.gradient(activation, tf_x)
        np.testing.assert_allclose(grad.numpy(), swish_grad(x, beta=5.0))

        with tf.GradientTape() as tape:
            activation = lq.quantizers.SwishSign(beta=10.0)(tf_x)
        grad = tape.gradient(activation, tf_x)
        np.testing.assert_allclose(grad.numpy(), swish_grad(x, beta=10.0)) 
Example #15
Source File: maml.py    From deepchem with MIT License 6 votes vote down vote up
def _compute_meta_loss(self, inputs, inputs2, variables):
    """This is called during fitting to compute the meta-loss (the loss after a
    few steps of optimization), and its gradient.
    """
    updated_variables = variables
    with tf.GradientTape() as meta_tape:
      for k in range(self.optimization_steps):
        with tf.GradientTape() as tape:
          loss, _ = self.learner.compute_model(inputs, updated_variables, True)
        gradients = tape.gradient(loss, updated_variables)
        updated_variables = [
            v if g is None else v - self.learning_rate * g
            for v, g in zip(updated_variables, gradients)
        ]
      meta_loss, _ = self.learner.compute_model(inputs2, updated_variables,
                                                True)
    meta_gradients = meta_tape.gradient(meta_loss, variables)
    return meta_loss, meta_gradients 
Example #16
Source File: maml.py    From deepchem with MIT License 6 votes vote down vote up
def train_on_current_task(self, optimization_steps=1, restore=True):
    """Perform a few steps of gradient descent to fine tune the model on the current task.

    Parameters
    ----------
    optimization_steps: int
      the number of steps of gradient descent to perform
    restore: bool
      if True, restore the model from the most recent checkpoint before optimizing
    """
    if restore:
      self.restore()
    variables = self.learner.variables
    for i in range(optimization_steps):
      inputs = self.learner.get_batch()
      with tf.GradientTape() as tape:
        loss, _ = self.learner.compute_model(inputs, variables, True)
      gradients = tape.gradient(loss, variables)
      self._tf_task_optimizer.apply_gradients(zip(gradients, variables)) 
Example #17
Source File: keras_model.py    From deepchem with MIT License 6 votes vote down vote up
def _create_gradient_fn(self, variables):
    """Create a function that computes gradients and applies them to the model.
    Because of the way TensorFlow function tracing works, we need to create a
    separate function for each new set of variables.
    """

    @tf.function(experimental_relax_shapes=True)
    def apply_gradient_for_batch(inputs, labels, weights, loss):
      with tf.GradientTape() as tape:
        outputs = self.model(inputs, training=True)
        if isinstance(outputs, tf.Tensor):
          outputs = [outputs]
        if self._loss_outputs is not None:
          outputs = [outputs[i] for i in self._loss_outputs]
        batch_loss = loss(outputs, labels, weights)
      if variables is None:
        vars = self.model.trainable_variables
      else:
        vars = variables
      grads = tape.gradient(batch_loss, vars)
      self._tf_optimizer.apply_gradients(zip(grads, vars))
      self._global_step.assign_add(1)
      return batch_loss

    return apply_gradient_for_batch 
Example #18
Source File: dense_image_warp_test.py    From addons with Apache License 2.0 6 votes vote down vote up
def test_gradients_exist():
    """Check that backprop can run.

    The correctness of the gradients is assumed, since the forward
    propagation is tested to be correct and we only use built-in tf
    ops. However, we perform a simple test to make sure that
    backprop can actually run.
    """
    batch_size, height, width, num_channels = [4, 5, 6, 7]
    image_shape = [batch_size, height, width, num_channels]
    image = tf.random.normal(image_shape)
    flow_shape = [batch_size, height, width, 2]
    flows = tf.Variable(tf.random.normal(shape=flow_shape) * 0.25, dtype=tf.float32)

    with tf.GradientTape() as t:
        interp = dense_image_warp(image, flows)

    grads = t.gradient(interp, flows).numpy()
    assert np.sum(np.abs(grads)) != 0 
Example #19
Source File: blocks.py    From BERT with Apache License 2.0 5 votes vote down vote up
def backward_grads_and_vars(self, x, y, dy, training=True):
        """Apply reversible block backward to outputs."""

        grads_all = []
        vars_all = []

        for i in reversed(range(len(self.blocks))):
            block = self.blocks[i]
            if i == 0:
                # First block usually contains downsampling that can't be reversed
                with tf.GradientTape() as tape:
                    x = tf.identity(x)
                    tape.watch(x)
                    y = block(x, training=training)

                    grads_combined = tape.gradient(
                        y, [x] + block.trainable_variables, output_gradients=dy)
                    dy = grads_combined[0]
                    grads_all += grads_combined[1:]
                    vars_all += block.trainable_variables
            else:
                y, dy, grads, vars_ = block.backward_grads_and_vars(
                    y, dy, training=training)
                grads_all += grads
                vars_all += vars_

        return dy, grads_all, vars_all 
Example #20
Source File: blocks.py    From BERT with Apache License 2.0 5 votes vote down vote up
def backward_grads_and_vars(self, x, y, dy, training=True):
        """Apply reversible block backward to outputs."""

        grads_all = []
        vars_all = []

        for i in reversed(range(len(self.blocks))):
            block = self.blocks[i]
            if i == 0:
                # First block usually contains downsampling that can't be reversed
                with tf.GradientTape() as tape:
                    x = tf.identity(x)
                    tape.watch(x)
                    y = block(x, training=training)

                    grads_combined = tape.gradient(
                        y, [x] + block.trainable_variables, output_gradients=dy)
                    dy = grads_combined[0]
                    grads_all += grads_combined[1:]
                    vars_all += block.trainable_variables
            else:
                y, dy, grads, vars_ = block.backward_grads_and_vars(
                    y, dy, training=training)
                grads_all += grads
                vars_all += vars_

        return dy, grads_all, vars_all 
Example #21
Source File: quantizers_test.py    From larq with Apache License 2.0 5 votes vote down vote up
def test_approx_sign_grad(self):
        @np.vectorize
        def approx_sign_grad(x):
            if np.abs(x) <= 1:
                return 2 - 2 * np.abs(x)
            return 0.0

        x = testing_utils.generate_real_values_with_zeros(shape=(8, 3, 3, 16))
        tf_x = tf.Variable(x)
        with tf.GradientTape() as tape:
            activation = lq.quantizers.ApproxSign()(tf_x)
        grad = tape.gradient(activation, tf_x)
        np.testing.assert_allclose(grad.numpy(), approx_sign_grad(x)) 
Example #22
Source File: quantizers_test.py    From larq with Apache License 2.0 5 votes vote down vote up
def test_magnitude_aware_sign_grad(self):
        a = np.random.uniform(-2, 2, (3, 2, 2, 3))
        x = tf.Variable(a)
        with tf.GradientTape() as tape:
            y = lq.quantizers.MagnitudeAwareSign()(x)
        grad = tape.gradient(y, x)

        scale_vector = [
            np.mean(np.reshape(np.abs(a[:, :, :, i]), [-1])) for i in range(3)
        ]

        np.testing.assert_allclose(
            grad.numpy(), np.where(abs(a) < 1, np.ones(a.shape) * scale_vector, 0)
        ) 
Example #23
Source File: blocks.py    From BERT with Apache License 2.0 5 votes vote down vote up
def backward_grads_and_vars(self, y, dy, training=True):
        """Manually compute backward gradients given input and output grads."""
        dy1, dy2 = tf.split(dy, num_or_size_splits=2, axis=self.axis)

        with tf.GradientTape(persistent=True) as tape:
            y = tf.identity(y)
            tape.watch(y)
            y1, y2 = tf.split(y, num_or_size_splits=2, axis=self.axis)
            z1 = y1
            gz1 = self.g(z1, training=training)
            x2 = y2 - gz1
            fx2 = self.f(x2, training=training)
            x1 = z1 - fx2

            grads_combined = tape.gradient(
                gz1, [z1] + self.g.trainable_variables, output_gradients=dy2)
            dz1 = dy1 + grads_combined[0]
            dg = grads_combined[1:]
            dx1 = dz1

            grads_combined = tape.gradient(
                fx2, [x2] + self.f.trainable_variables, output_gradients=dz1)
            dx2 = dy2 + grads_combined[0]
            df = grads_combined[1:]

            del tape

        grads = df + dg
        vars_ = self.f.trainable_variables + self.g.trainable_variables

        x = tf.concat([x1, x2], axis=self.axis)
        dx = tf.concat([dx1, dx2], axis=self.axis)

        return x, dx, grads, vars_ 
Example #24
Source File: quantizers_test.py    From larq with Apache License 2.0 5 votes vote down vote up
def test_ste_grad(self, fn):
        @np.vectorize
        def ste_grad(x):
            if np.abs(x) <= 1:
                return 1.0
            return 0.0

        x = testing_utils.generate_real_values_with_zeros(shape=(8, 3, 3, 16))
        tf_x = tf.Variable(x)
        with tf.GradientTape() as tape:
            activation = fn(tf_x)
        grad = tape.gradient(activation, tf_x)
        np.testing.assert_allclose(grad.numpy(), ste_grad(x))

    # Test with and without default threshold 
Example #25
Source File: tf2_hisan.py    From Projects with MIT License 5 votes vote down vote up
def _train_step(self,text,labels):
        with tf.GradientTape() as tape:
            predictions = self.model(text,training=True)
            loss = self.loss_object(labels,predictions)
        gradients = tape.gradient(loss,self.model.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients,self.model.trainable_variables))
        return predictions, loss 
Example #26
Source File: train_model_util_TensorFlow.py    From Awesome-RecSystem-Models with MIT License 5 votes vote down vote up
def train_one_step(model, optimizer, idx, value, label):
    with tf.GradientTape() as tape:
        output = model(idx, value)
        loss = cross_entropy_loss(y_true=label, y_pred=output)

        reg_loss = []
        for p in model.trainable_variables:
            reg_loss.append(tf.nn.l2_loss(p))
        reg_loss = tf.reduce_sum(tf.stack(reg_loss))
        loss = loss + model.reg_l2 * reg_loss

    grads = tape.gradient(loss, model.trainable_variables)
    grads = [tf.clip_by_norm(g, 100) for g in grads]
    optimizer.apply_gradients(grads_and_vars=zip(grads, model.trainable_variables))
    return loss 
Example #27
Source File: levenberg_marquardt.py    From graphics with Apache License 2.0 5 votes vote down vote up
def _values_and_jacobian(residuals, variables):
  """Computes the residual values and the Jacobian matrix.

  Args:
    residuals: A list of residuals.
    variables: A list of variables.

  Returns:
    The residual values and the Jacobian matrix.
  """

  def _compute_residual_values(residuals, variables):
    """Computes the residual values."""
    return tf.concat([
        tf.reshape(residual(*variables), shape=(-1,)) for residual in residuals
    ],
                     axis=-1)

  def _compute_jacobian(values, variables, tape):
    """Computes the Jacobian matrix."""
    jacobians = tape.jacobian(
        values, variables, unconnected_gradients=tf.UnconnectedGradients.ZERO)
    return tf.concat([
        tf.reshape(jacobian, shape=(tf.shape(input=jacobian)[0], -1))
        for jacobian in jacobians
    ],
                     axis=-1)

  with tf.GradientTape(watch_accessed_variables=False, persistent=True) as tape:
    for variable in variables:
      tape.watch(variable)
    values = _compute_residual_values(residuals, variables)
  jacobian = _compute_jacobian(values, variables, tape)
  del tape
  values = tf.expand_dims(values, axis=-1)
  return values, jacobian 
Example #28
Source File: quantizers_test.py    From larq with Apache License 2.0 5 votes vote down vote up
def test_identity_ste_grad(self, fn):
        x = testing_utils.generate_real_values_with_zeros(shape=(8, 3, 3, 16))
        tf_x = tf.Variable(x)
        with tf.GradientTape() as tape:
            activation = fn(tf_x)
        grad = tape.gradient(activation, tf_x)
        np.testing.assert_allclose(grad.numpy(), np.ones_like(x)) 
Example #29
Source File: weight_decay_optimizers.py    From addons with Apache License 2.0 5 votes vote down vote up
def minimize(self, loss, var_list, grad_loss=None, name=None, decay_var_list=None):
        """Minimize `loss` by updating `var_list`.

        This method simply computes gradient using `tf.GradientTape` and calls
        `apply_gradients()`. If you want to process the gradient before
        applying then call `tf.GradientTape` and `apply_gradients()` explicitly
        instead of using this function.

        Args:
            loss: A callable taking no arguments which returns the value to
                minimize.
            var_list: list or tuple of `Variable` objects to update to
                minimize `loss`, or a callable returning the list or tuple of
                `Variable` objects. Use callable when the variable list would
                otherwise be incomplete before `minimize` since the variables
                are created at the first time `loss` is called.
            grad_loss: Optional. A `Tensor` holding the gradient computed for
                `loss`.
            decay_var_list: Optional list of variables to be decayed. Defaults
                to all variables in var_list.
            name: Optional name for the returned operation.
        Returns:
            An Operation that updates the variables in `var_list`.
        Raises:
            ValueError: If some of the variables are not `Variable` objects.
        """
        self._decay_var_list = (
            set([_ref(v) for v in decay_var_list]) if decay_var_list else False
        )
        return super().minimize(loss, var_list=var_list, grad_loss=grad_loss, name=name) 
Example #30
Source File: sparse_image_warp_test.py    From addons with Apache License 2.0 5 votes vote down vote up
def test_that_backprop_runs():
    """Making sure the gradients can be computed."""
    batch_size = 1
    image_height = 9
    image_width = 12
    image = tf.Variable(
        np.random.uniform(size=[batch_size, image_height, image_width, 3]),
        dtype=tf.float32,
    )
    control_point_locations = [[3.0, 3.0]]
    control_point_locations = tf.constant(
        np.float32(np.expand_dims(control_point_locations, 0))
    )
    control_point_displacements = [[0.25, -0.5]]
    control_point_displacements = tf.constant(
        np.float32(np.expand_dims(control_point_displacements, 0))
    )

    with tf.GradientTape() as t:
        warped_image, _ = sparse_image_warp(
            image,
            control_point_locations,
            control_point_locations + control_point_displacements,
            num_boundary_points=3,
        )

    gradients = t.gradient(warped_image, image).numpy()
    assert np.sum(np.abs(gradients)) != 0