Python tensorflow.python.ops.math_ops.rsqrt() Examples

The following are 30 code examples of tensorflow.python.ops.math_ops.rsqrt(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.python.ops.math_ops , or try the search function

Example #1

Source File: utils.py From Transformer-in-generating-dialogue with Apache License 2.0

6 votes

def LRSchedule(global_step, d_model, warmup_steps=4000):
	if global_step is None:
		raise ValueError("global_step is required for learning_rate_schedule.")

	def deal_lr(global_step, d_model, warmup_steps):
		d_model = ops.convert_to_tensor(d_model, dtype=tf.float32)
		dtype = d_model.dtype
		warmup_steps = math_ops.cast(warmup_steps, dtype)

		global_step_recomp = math_ops.cast(global_step, dtype)
		arg1 = math_ops.rsqrt(global_step_recomp)
		arg2 = math_ops.multiply(global_step_recomp, math_ops.pow(warmup_steps, -1.5))

		return math_ops.multiply(math_ops.rsqrt(d_model), math_ops.minimum(arg1, arg2))

	return functools.partial(deal_lr, global_step, d_model, warmup_steps)

Example #2

Source File: student_t.py From lambda-packs with MIT License

6 votes

def _sample_n(self, n, seed=None):
    # The sampling method comes from the fact that if:
    #   X ~ Normal(0, 1)
    #   Z ~ Chi2(df)
    #   Y = X / sqrt(Z / df)
    # then:
    #   Y ~ StudentT(df).
    shape = array_ops.concat([[n], self.batch_shape_tensor()], 0)
    normal_sample = random_ops.random_normal(shape, dtype=self.dtype, seed=seed)
    df = self.df * array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype)
    gamma_sample = random_ops.random_gamma(
        [n],
        0.5 * df,
        beta=0.5,
        dtype=self.dtype,
        seed=distribution_util.gen_new_seed(seed, salt="student_t"))
    samples = normal_sample * math_ops.rsqrt(gamma_sample / df)
    return samples * self.scale + self.loc  # Abs(scale) not wanted.

Example #3

Source File: student_t.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License

6 votes

def _sample_n(self, n, seed=None):
    # The sampling method comes from the fact that if:
    #   X ~ Normal(0, 1)
    #   Z ~ Chi2(df)
    #   Y = X / sqrt(Z / df)
    # then:
    #   Y ~ StudentT(df).
    shape = array_ops.concat([[n], self.batch_shape_tensor()], 0)
    normal_sample = random_ops.random_normal(shape, dtype=self.dtype, seed=seed)
    df = self.df * array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype)
    gamma_sample = random_ops.random_gamma(
        [n],
        0.5 * df,
        beta=0.5,
        dtype=self.dtype,
        seed=distribution_util.gen_new_seed(seed, salt="student_t"))
    samples = normal_sample * math_ops.rsqrt(gamma_sample / df)
    return samples * self.scale + self.loc  # Abs(scale) not wanted.

Example #4

Source File: nn_grad.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License

6 votes

def _BatchNormWithGlobalNormalizationGrad(op, grad):
  """Return the gradients for the 5 inputs of BatchNormWithGlobalNormalization.

  We do not backprop anything for the mean and var intentionally as they are
  not being trained with backprop in the operation.

  Args:
    op: The BatchNormOp for which we need to generate gradients.
    grad: Tensor.  The gradients passed to the BatchNormOp.

  Returns:
    dx: Backprop for input, which is (grad * (g * rsqrt(v + epsilon)))
    dm: Backprop for mean, which is
        sum_over_rest(grad * g) * (-1 / rsqrt(v + epsilon))
    dv: Backprop for variance, which is
        sum_over_rest(grad * g * (x - m)) * (-1/2) * (v + epsilon) ^ (-3/2)
    db: Backprop for beta, which is grad reduced in all except the
        last dimension.
    dg: Backprop for gamma, which is (grad * ((x - m) * rsqrt(v + epsilon)))
  """
  dx, dm, dv, db, dg = gen_nn_ops._batch_norm_with_global_normalization_grad(
      op.inputs[0], op.inputs[1], op.inputs[2], op.inputs[4], grad,
      op.get_attr("variance_epsilon"), op.get_attr("scale_after_normalization"))
  return dx, dm, dv, db, dg

Example #5

Source File: layers.py From tf-slim with Apache License 2.0

5 votes

def poincare_normalize(x, axis=1, epsilon=1e-5, name=None):
  """Project into the Poincare ball with norm <= 1.0 - epsilon.

  https://en.wikipedia.org/wiki/Poincare_ball_model

  Used in
  Poincare Embeddings for Learning Hierarchical Representations
  Maximilian Nickel, Douwe Kiela
  https://arxiv.org/pdf/1705.08039.pdf

  For a 1-D tensor with `axis = 0`, computes

                (x * (1 - epsilon)) / ||x||     if ||x|| > 1 - epsilon
      output =
                 x                              otherwise

  For `x` with more dimensions, independently normalizes each 1-D slice along
  dimension `axis`.

  Args:
    x: A `Tensor`.
    axis: Axis along which to normalize.  A scalar or a vector of integers.
    epsilon: A small deviation from the edge of the unit sphere for numerical
      stability.
    name: A name for this operation (optional).

  Returns:
    A `Tensor` with the same shape as `x`.
  """
  with ops.name_scope(name, 'poincare_normalize', [x]) as name:
    x = ops.convert_to_tensor(x, name='x')
    square_sum = math_ops.reduce_sum(math_ops.square(x), axis, keepdims=True)
    x_inv_norm = math_ops.rsqrt(square_sum)
    x_inv_norm = math_ops.minimum((1. - epsilon) * x_inv_norm, 1.)
    return math_ops.multiply(x, x_inv_norm, name=name)

Example #6

Source File: test_forward.py From incubator-tvm with Apache License 2.0

5 votes

def _test_rsqrt(data):
    """ One iteration of rsqrt """
    return _test_unary_elemwise(math_ops.rsqrt, data)
#######################################################################
# Neg
# ---

Example #7

Source File: clip_ops.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License

5 votes

def clip_by_average_norm(t, clip_norm, name=None):
  """Clips tensor values to a maximum average L2-norm.

  Given a tensor `t`, and a maximum clip value `clip_norm`, this operation
  normalizes `t` so that its average L2-norm is less than or equal to
  `clip_norm`. Specifically, if the average L2-norm is already less than or
  equal to `clip_norm`, then `t` is not modified. If the average L2-norm is
  greater than `clip_norm`, then this operation returns a tensor of the same
  type and shape as `t` with its values set to:

  `t * clip_norm / l2norm_avg(t)`

  In this case, the average L2-norm of the output tensor is `clip_norm`.

  This operation is typically used to clip gradients before applying them with
  an optimizer.

  Args:
    t: A `Tensor`.
    clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value.
    name: A name for the operation (optional).

  Returns:
    A clipped `Tensor`.
  """
  with ops.name_scope(name, "clip_by_average_norm", [t, clip_norm]) as name:
    t = ops.convert_to_tensor(t, name="t")

    # Calculate L2-norm per element, clip elements by ratio of clip_norm to
    # L2-norm per element
    n_element = math_ops.cast(array_ops.size(t), dtypes.float32)
    l2norm_inv = math_ops.rsqrt(
        math_ops.reduce_sum(t * t, math_ops.range(array_ops.rank(t))))
    tclip = array_ops.identity(
        t * clip_norm * math_ops.minimum(
            l2norm_inv * n_element, constant_op.constant(1.0) / clip_norm),
        name=name)

  return tclip

Example #8

Source File: gdn.py From pcc_geo_cnn with MIT License

5 votes

def call(self, inputs):
    inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
    ndim = self._input_rank

    if self.rectify:
      inputs = nn.relu(inputs)

    # Compute normalization pool.
    if ndim == 2:
      norm_pool = math_ops.matmul(math_ops.square(inputs), self.gamma)
      norm_pool = nn.bias_add(norm_pool, self.beta)
    elif self.data_format == "channels_last" and ndim <= 5:
      shape = self.gamma.shape.as_list()
      gamma = array_ops.reshape(self.gamma, (ndim - 2) * [1] + shape)
      norm_pool = nn.convolution(math_ops.square(inputs), gamma, "VALID")
      norm_pool = nn.bias_add(norm_pool, self.beta)
    else:  # generic implementation
      # This puts channels in the last dimension regardless of input.
      norm_pool = math_ops.tensordot(
          math_ops.square(inputs), self.gamma, [[self._channel_axis()], [0]])
      norm_pool += self.beta
      if self.data_format == "channels_first":
        # Return to channels_first format if necessary.
        axes = list(range(ndim - 1))
        axes.insert(1, ndim - 1)
        norm_pool = array_ops.transpose(norm_pool, axes)

    if self.inverse:
      norm_pool = math_ops.sqrt(norm_pool)
    else:
      norm_pool = math_ops.rsqrt(norm_pool)
    outputs = inputs * norm_pool

    if not context.executing_eagerly():
      outputs.set_shape(self.compute_output_shape(inputs.shape))
    return outputs

Example #9

Source File: image_ops_impl.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License

5 votes

def per_image_standardization(image):
  """Linearly scales `image` to have zero mean and unit norm.

  This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average
  of all values in image, and
  `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`.

  `stddev` is the standard deviation of all values in `image`. It is capped
  away from zero to protect against division by 0 when handling uniform images.

  Args:
    image: 3-D tensor of shape `[height, width, channels]`.

  Returns:
    The standardized image with same shape as `image`.

  Raises:
    ValueError: if the shape of 'image' is incompatible with this function.
  """
  image = ops.convert_to_tensor(image, name='image')
  image = control_flow_ops.with_dependencies(
      _Check3DImage(image, require_static=False), image)
  num_pixels = math_ops.reduce_prod(array_ops.shape(image))

  image = math_ops.cast(image, dtype=dtypes.float32)
  image_mean = math_ops.reduce_mean(image)

  variance = (math_ops.reduce_mean(math_ops.square(image)) -
              math_ops.square(image_mean))
  variance = gen_nn_ops.relu(variance)
  stddev = math_ops.sqrt(variance)

  # Apply a minimum normalization that protects us against uniform images.
  min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32))
  pixel_value_scale = math_ops.maximum(stddev, min_stddev)
  pixel_value_offset = image_mean

  image = math_ops.subtract(image, pixel_value_offset)
  image = math_ops.div(image, pixel_value_scale)
  return image

Example #10

Source File: layers.py From tensornets with MIT License

5 votes

def poincare_normalize(x, axis=1, epsilon=1e-5, name=None):
  """Project into the Poincare ball with norm <= 1.0 - epsilon.

  https://en.wikipedia.org/wiki/Poincare_ball_model

  Used in
  Poincare Embeddings for Learning Hierarchical Representations
  Maximilian Nickel, Douwe Kiela
  https://arxiv.org/pdf/1705.08039.pdf

  For a 1-D tensor with `axis = 0`, computes

                (x * (1 - epsilon)) / ||x||     if ||x|| > 1 - epsilon
      output =
                 x                              otherwise

  For `x` with more dimensions, independently normalizes each 1-D slice along
  dimension `axis`.

  Args:
    x: A `Tensor`.
    axis: Axis along which to normalize.  A scalar or a vector of integers.
    epsilon: A small deviation from the edge of the unit sphere for numerical
      stability.
    name: A name for this operation (optional).

  Returns:
    A `Tensor` with the same shape as `x`.
  """
  with ops.name_scope(name, 'poincare_normalize', [x]) as name:
    x = ops.convert_to_tensor(x, name='x')
    square_sum = math_ops.reduce_sum(math_ops.square(x), axis, keepdims=True)
    x_inv_norm = math_ops.rsqrt(square_sum)
    x_inv_norm = math_ops.minimum((1. - epsilon) * x_inv_norm, 1.)
    return math_ops.multiply(x, x_inv_norm, name=name)

Example #11

Source File: nn_impl.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License

5 votes

def l2_normalize(x, dim, epsilon=1e-12, name=None):
  """Normalizes along dimension `dim` using an L2 norm.

  For a 1-D tensor with `dim = 0`, computes

      output = x / sqrt(max(sum(x**2), epsilon))

  For `x` with more dimensions, independently normalizes each 1-D slice along
  dimension `dim`.

  Args:
    x: A `Tensor`.
    dim: Dimension along which to normalize.  A scalar or a vector of
      integers.
    epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the
      divisor if `norm < sqrt(epsilon)`.
    name: A name for this operation (optional).

  Returns:
    A `Tensor` with the same shape as `x`.
  """
  with ops.name_scope(name, "l2_normalize", [x]) as name:
    x = ops.convert_to_tensor(x, name="x")
    square_sum = math_ops.reduce_sum(math_ops.square(x), dim, keep_dims=True)
    x_inv_norm = math_ops.rsqrt(math_ops.maximum(square_sum, epsilon))
    return math_ops.multiply(x, x_inv_norm, name=name)

Example #12

Source File: clip_ops.py From deep_image_model with Apache License 2.0

5 votes

def clip_by_average_norm(t, clip_norm, name=None):
  """Clips tensor values to a maximum average L2-norm.

  Given a tensor `t`, and a maximum clip value `clip_norm`, this operation
  normalizes `t` so that its average L2-norm is less than or equal to
  `clip_norm`. Specifically, if the average L2-norm is already less than or
  equal to `clip_norm`, then `t` is not modified. If the average L2-norm is
  greater than `clip_norm`, then this operation returns a tensor of the same
  type and shape as `t` with its values set to:

  `t * clip_norm / l2norm_avg(t)`

  In this case, the average L2-norm of the output tensor is `clip_norm`.

  This operation is typically used to clip gradients before applying them with
  an optimizer.

  Args:
    t: A `Tensor`.
    clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value.
    name: A name for the operation (optional).

  Returns:
    A clipped `Tensor`.
  """
  with ops.name_scope(name, "clip_by_average_norm", [t, clip_norm]) as name:
    t = ops.convert_to_tensor(t, name="t")

    # Calculate L2-norm per element, clip elements by ratio of clip_norm to
    # L2-norm per element
    n_element = math_ops.cast(array_ops.size(t), dtypes.float32)
    l2norm_inv = math_ops.rsqrt(
        math_ops.reduce_sum(t * t, math_ops.range(array_ops.rank(t))))
    tclip = array_ops.identity(
        t * clip_norm * math_ops.minimum(
            l2norm_inv * n_element, constant_op.constant(1.0) / clip_norm),
        name=name)

  return tclip

Example #13

Source File: nn.py From deep_image_model with Apache License 2.0

5 votes

def l2_normalize(x, dim, epsilon=1e-12, name=None):
  """Normalizes along dimension `dim` using an L2 norm.

  For a 1-D tensor with `dim = 0`, computes

      output = x / sqrt(max(sum(x**2), epsilon))

  For `x` with more dimensions, independently normalizes each 1-D slice along
  dimension `dim`.

  Args:
    x: A `Tensor`.
    dim: Dimension along which to normalize.  A scalar or a vector of
      integers.
    epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the
      divisor if `norm < sqrt(epsilon)`.
    name: A name for this operation (optional).

  Returns:
    A `Tensor` with the same shape as `x`.
  """
  with ops.name_scope(name, "l2_normalize", [x]) as name:
    x = ops.convert_to_tensor(x, name="x")
    square_sum = math_ops.reduce_sum(math_ops.square(x), dim, keep_dims=True)
    x_inv_norm = math_ops.rsqrt(math_ops.maximum(square_sum, epsilon))
    return math_ops.mul(x, x_inv_norm, name=name)

Example #14

Source File: image_ops_impl.py From keras-lambda with MIT License

5 votes

def per_image_standardization(image):
  """Linearly scales `image` to have zero mean and unit norm.

  This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average
  of all values in image, and
  `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`.

  `stddev` is the standard deviation of all values in `image`. It is capped
  away from zero to protect against division by 0 when handling uniform images.

  Args:
    image: 3-D tensor of shape `[height, width, channels]`.

  Returns:
    The standardized image with same shape as `image`.

  Raises:
    ValueError: if the shape of 'image' is incompatible with this function.
  """
  image = ops.convert_to_tensor(image, name='image')
  _Check3DImage(image, require_static=False)
  num_pixels = math_ops.reduce_prod(array_ops.shape(image))

  image = math_ops.cast(image, dtype=dtypes.float32)
  image_mean = math_ops.reduce_mean(image)

  variance = (math_ops.reduce_mean(math_ops.square(image)) -
              math_ops.square(image_mean))
  variance = gen_nn_ops.relu(variance)
  stddev = math_ops.sqrt(variance)

  # Apply a minimum normalization that protects us against uniform images.
  min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32))
  pixel_value_scale = math_ops.maximum(stddev, min_stddev)
  pixel_value_offset = image_mean

  image = math_ops.subtract(image, pixel_value_offset)
  image = math_ops.div(image, pixel_value_scale)
  return image

Example #15

Source File: official_tf_image.py From X-Detector with Apache License 2.0

5 votes

def per_image_standardization(image):
  """Linearly scales `image` to have zero mean and unit norm.

  This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average
  of all values in image, and
  `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`.

  `stddev` is the standard deviation of all values in `image`. It is capped
  away from zero to protect against division by 0 when handling uniform images.

  Args:
    image: 3-D tensor of shape `[height, width, channels]`.

  Returns:
    The standardized image with same shape as `image`.

  Raises:
    ValueError: if the shape of 'image' is incompatible with this function.
  """
  image = ops.convert_to_tensor(image, name='image')
  image = control_flow_ops.with_dependencies(
      _Check3DImage(image, require_static=False), image)
  num_pixels = math_ops.reduce_prod(array_ops.shape(image))

  image = math_ops.cast(image, dtype=dtypes.float32)
  image_mean = math_ops.reduce_mean(image)

  variance = (math_ops.reduce_mean(math_ops.square(image)) -
              math_ops.square(image_mean))
  variance = gen_nn_ops.relu(variance)
  stddev = math_ops.sqrt(variance)

  # Apply a minimum normalization that protects us against uniform images.
  min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32))
  pixel_value_scale = math_ops.maximum(stddev, min_stddev)
  pixel_value_offset = image_mean

  image = math_ops.subtract(image, pixel_value_offset)
  image = math_ops.div(image, pixel_value_scale)
  return image

Example #16

Source File: distributed_shampoo.py From lingvo with Apache License 2.0

5 votes

def _apply_dense(self, grad, var):
    # Calculates the preconditioner statistics for each tensor.
    partitioned_grads = TensorPartitioner.partition_tensor(
        grad, self._partition_info)
    shape = var.get_shape()
    fallback_to_diagonal = self._fallback_to_diagonal_for_shape(shape)

    precond_statistics_update = []
    if not fallback_to_diagonal:
      precond_statistics_update = self._updated_statistics(
          var, partitioned_grads)

    accumulator = self.get_slot(var, "accumulator")
    accumulator_updated = state_ops.assign_add(accumulator, grad * grad)
    accumulator_inv_sqrt = math_ops.rsqrt(accumulator_updated + 1e-30)
    if self._momentum > 0.0:
      scaled_g = (1.0 - self._momentum_tensor) * (grad * accumulator_inv_sqrt)
      gbar = self.get_slot(var, "momentum")
      gbar_updated = state_ops.assign_add(
          gbar,
          gbar * (self._momentum_tensor - 1.0) + scaled_g)
    else:
      gbar_updated = (grad * accumulator_inv_sqrt)

    if not fallback_to_diagonal:
      # Update the preconditioner statistics followed by computing the
      # preconditioned gradient.
      with ops.control_dependencies(precond_statistics_update):
        s = tf.cast(self._run_nondiagonal_update, tf.float32)
        preconditioned_grad = self._preconditioned_update(
            var, partitioned_grads, gbar_updated)
        # slowly adapt from diagonal to preconditioned gradient.
        w = self._run_nondiagonal_update_warmup
        warmup_update = s * self._learning_rate_tensor * (
            w * preconditioned_grad + (1.0 - w) * gbar_updated)
        fallback_update = (1 - s) * (self._learning_rate_tensor * gbar_updated)
        return state_ops.assign_sub(var, warmup_update + fallback_update)
    else:
      return state_ops.assign_sub(var,
                                  self._learning_rate_tensor * gbar_updated)

Example #17

Source File: clip_ops.py From lambda-packs with MIT License

5 votes

def clip_by_average_norm(t, clip_norm, name=None):
  """Clips tensor values to a maximum average L2-norm.

  Given a tensor `t`, and a maximum clip value `clip_norm`, this operation
  normalizes `t` so that its average L2-norm is less than or equal to
  `clip_norm`. Specifically, if the average L2-norm is already less than or
  equal to `clip_norm`, then `t` is not modified. If the average L2-norm is
  greater than `clip_norm`, then this operation returns a tensor of the same
  type and shape as `t` with its values set to:

  `t * clip_norm / l2norm_avg(t)`

  In this case, the average L2-norm of the output tensor is `clip_norm`.

  This operation is typically used to clip gradients before applying them with
  an optimizer.

  Args:
    t: A `Tensor`.
    clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value.
    name: A name for the operation (optional).

  Returns:
    A clipped `Tensor`.
  """
  with ops.name_scope(name, "clip_by_average_norm", [t, clip_norm]) as name:
    t = ops.convert_to_tensor(t, name="t")

    # Calculate L2-norm per element, clip elements by ratio of clip_norm to
    # L2-norm per element
    n_element = math_ops.cast(array_ops.size(t), dtypes.float32)
    l2norm_inv = math_ops.rsqrt(
        math_ops.reduce_sum(t * t, math_ops.range(array_ops.rank(t))))
    tclip = array_ops.identity(
        t * clip_norm * math_ops.minimum(
            l2norm_inv * n_element, constant_op.constant(1.0) / clip_norm),
        name=name)

  return tclip

Example #18

Source File: image_ops_impl.py From lambda-packs with MIT License

5 votes

def per_image_standardization(image):
  """Linearly scales `image` to have zero mean and unit norm.

  This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average
  of all values in image, and
  `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`.

  `stddev` is the standard deviation of all values in `image`. It is capped
  away from zero to protect against division by 0 when handling uniform images.

  Args:
    image: 3-D tensor of shape `[height, width, channels]`.

  Returns:
    The standardized image with same shape as `image`.

  Raises:
    ValueError: if the shape of 'image' is incompatible with this function.
  """
  image = ops.convert_to_tensor(image, name='image')
  image = control_flow_ops.with_dependencies(
      _Check3DImage(image, require_static=False), image)
  num_pixels = math_ops.reduce_prod(array_ops.shape(image))

  image = math_ops.cast(image, dtype=dtypes.float32)
  image_mean = math_ops.reduce_mean(image)

  variance = (math_ops.reduce_mean(math_ops.square(image)) -
              math_ops.square(image_mean))
  variance = gen_nn_ops.relu(variance)
  stddev = math_ops.sqrt(variance)

  # Apply a minimum normalization that protects us against uniform images.
  min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32))
  pixel_value_scale = math_ops.maximum(stddev, min_stddev)
  pixel_value_offset = image_mean

  image = math_ops.subtract(image, pixel_value_offset)
  image = math_ops.div(image, pixel_value_scale)
  return image

Example #19

Source File: dirichlet.py From lambda-packs with MIT License

5 votes

def _variance_scale_term(self):
    """Helper to `_covariance` and `_variance` which computes a shared scale."""
    return math_ops.rsqrt(1. + self.total_concentration[..., array_ops.newaxis])

Example #20

Source File: nn_impl.py From keras-lambda with MIT License

5 votes

def l2_normalize(x, dim, epsilon=1e-12, name=None):
  """Normalizes along dimension `dim` using an L2 norm.

  For a 1-D tensor with `dim = 0`, computes

      output = x / sqrt(max(sum(x**2), epsilon))

  For `x` with more dimensions, independently normalizes each 1-D slice along
  dimension `dim`.

  Args:
    x: A `Tensor`.
    dim: Dimension along which to normalize.  A scalar or a vector of
      integers.
    epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the
      divisor if `norm < sqrt(epsilon)`.
    name: A name for this operation (optional).

  Returns:
    A `Tensor` with the same shape as `x`.
  """
  with ops.name_scope(name, "l2_normalize", [x]) as name:
    x = ops.convert_to_tensor(x, name="x")
    square_sum = math_ops.reduce_sum(math_ops.square(x), dim, keep_dims=True)
    x_inv_norm = math_ops.rsqrt(math_ops.maximum(square_sum, epsilon))
    return math_ops.multiply(x, x_inv_norm, name=name)

Example #21

Source File: nn_impl.py From auto-alt-text-lambda-api with MIT License

5 votes

def l2_normalize(x, dim, epsilon=1e-12, name=None):
  """Normalizes along dimension `dim` using an L2 norm.

  For a 1-D tensor with `dim = 0`, computes

      output = x / sqrt(max(sum(x**2), epsilon))

  For `x` with more dimensions, independently normalizes each 1-D slice along
  dimension `dim`.

  Args:
    x: A `Tensor`.
    dim: Dimension along which to normalize.  A scalar or a vector of
      integers.
    epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the
      divisor if `norm < sqrt(epsilon)`.
    name: A name for this operation (optional).

  Returns:
    A `Tensor` with the same shape as `x`.
  """
  with ops.name_scope(name, "l2_normalize", [x]) as name:
    x = ops.convert_to_tensor(x, name="x")
    square_sum = math_ops.reduce_sum(math_ops.square(x), dim, keep_dims=True)
    x_inv_norm = math_ops.rsqrt(math_ops.maximum(square_sum, epsilon))
    return math_ops.multiply(x, x_inv_norm, name=name)

Example #22

Source File: clip_ops.py From keras-lambda with MIT License

5 votes

def clip_by_average_norm(t, clip_norm, name=None):
  """Clips tensor values to a maximum average L2-norm.

  Given a tensor `t`, and a maximum clip value `clip_norm`, this operation
  normalizes `t` so that its average L2-norm is less than or equal to
  `clip_norm`. Specifically, if the average L2-norm is already less than or
  equal to `clip_norm`, then `t` is not modified. If the average L2-norm is
  greater than `clip_norm`, then this operation returns a tensor of the same
  type and shape as `t` with its values set to:

  `t * clip_norm / l2norm_avg(t)`

  In this case, the average L2-norm of the output tensor is `clip_norm`.

  This operation is typically used to clip gradients before applying them with
  an optimizer.

  Args:
    t: A `Tensor`.
    clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value.
    name: A name for the operation (optional).

  Returns:
    A clipped `Tensor`.
  """
  with ops.name_scope(name, "clip_by_average_norm", [t, clip_norm]) as name:
    t = ops.convert_to_tensor(t, name="t")

    # Calculate L2-norm per element, clip elements by ratio of clip_norm to
    # L2-norm per element
    n_element = math_ops.cast(array_ops.size(t), dtypes.float32)
    l2norm_inv = math_ops.rsqrt(
        math_ops.reduce_sum(t * t, math_ops.range(array_ops.rank(t))))
    tclip = array_ops.identity(
        t * clip_norm * math_ops.minimum(
            l2norm_inv * n_element, constant_op.constant(1.0) / clip_norm),
        name=name)

  return tclip

Example #23

Source File: clip_ops.py From auto-alt-text-lambda-api with MIT License

5 votes

def clip_by_average_norm(t, clip_norm, name=None):
  """Clips tensor values to a maximum average L2-norm.

  Given a tensor `t`, and a maximum clip value `clip_norm`, this operation
  normalizes `t` so that its average L2-norm is less than or equal to
  `clip_norm`. Specifically, if the average L2-norm is already less than or
  equal to `clip_norm`, then `t` is not modified. If the average L2-norm is
  greater than `clip_norm`, then this operation returns a tensor of the same
  type and shape as `t` with its values set to:

  `t * clip_norm / l2norm_avg(t)`

  In this case, the average L2-norm of the output tensor is `clip_norm`.

  This operation is typically used to clip gradients before applying them with
  an optimizer.

  Args:
    t: A `Tensor`.
    clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value.
    name: A name for the operation (optional).

  Returns:
    A clipped `Tensor`.
  """
  with ops.name_scope(name, "clip_by_average_norm", [t, clip_norm]) as name:
    t = ops.convert_to_tensor(t, name="t")

    # Calculate L2-norm per element, clip elements by ratio of clip_norm to
    # L2-norm per element
    n_element = math_ops.cast(array_ops.size(t), dtypes.float32)
    l2norm_inv = math_ops.rsqrt(
        math_ops.reduce_sum(t * t, math_ops.range(array_ops.rank(t))))
    tclip = array_ops.identity(
        t * clip_norm * math_ops.minimum(
            l2norm_inv * n_element, constant_op.constant(1.0) / clip_norm),
        name=name)

  return tclip

Example #24

Source File: image_ops_impl.py From auto-alt-text-lambda-api with MIT License

5 votes

def per_image_standardization(image):
  """Linearly scales `image` to have zero mean and unit norm.

  This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average
  of all values in image, and
  `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`.

  `stddev` is the standard deviation of all values in `image`. It is capped
  away from zero to protect against division by 0 when handling uniform images.

  Args:
    image: 3-D tensor of shape `[height, width, channels]`.

  Returns:
    The standardized image with same shape as `image`.

  Raises:
    ValueError: if the shape of 'image' is incompatible with this function.
  """
  image = ops.convert_to_tensor(image, name='image')
  _Check3DImage(image, require_static=False)
  num_pixels = math_ops.reduce_prod(array_ops.shape(image))

  image = math_ops.cast(image, dtype=dtypes.float32)
  image_mean = math_ops.reduce_mean(image)

  variance = (math_ops.reduce_mean(math_ops.square(image)) -
              math_ops.square(image_mean))
  variance = gen_nn_ops.relu(variance)
  stddev = math_ops.sqrt(variance)

  # Apply a minimum normalization that protects us against uniform images.
  min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32))
  pixel_value_scale = math_ops.maximum(stddev, min_stddev)
  pixel_value_offset = image_mean

  image = math_ops.subtract(image, pixel_value_offset)
  image = math_ops.div(image, pixel_value_scale)
  return image

Example #25

Source File: nn_grad.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License

4 votes

def _BaseFusedBatchNormGrad(op, use_v2, *grad):
  """Return the gradients for the 3 inputs of BatchNorm.

  Args:
    op: The BatchNormOp for which we need to compute gradients.
    use_v2: Boolean indicating whether to use the V2 version of the fused batch
            norm gradient.
    *grad: An argument list for tensors of gradients wrt the outputs
          with grad[0] as grad_y.

  Returns:
    grad_x: gradient for x, which is scale * rsqrt(variance + epsilon) *
            [grad_y - mean(grad_y) - (x - mean(x)) *
            mean(grad_y * (x - mean(x))) / (variance + epsilon)]
            in training mode; grad_y * scale * rsqrt(pop_variance + epsilon)
            in freeze mode.

    grad_scale: gradient for scale, which is sum(grad_y * (x - mean(x)) *
                rsqrt(variance + epsilon)) in training mode;
                sum(grad_y * (x - pop_mean) * rsqrt(pop_variance + epsilon))
                in freeze mode.

    grad_offset: gradient for offset, which is sum(grad_y) in training mode;
                 sum(grad_y) in freeze mode.
  """
  x = op.inputs[0]
  grad_y = grad[0]
  scale = op.inputs[1]
  epsilon = op.get_attr("epsilon")
  data_format = op.get_attr("data_format")
  is_training = op.get_attr("is_training")
  grad_fun = (gen_nn_ops.fused_batch_norm_grad_v2 if use_v2
              else gen_nn_ops.fused_batch_norm_grad)
  if is_training:
    return grad_fun(
        grad_y,
        x,
        scale,
        op.outputs[3],
        op.outputs[4],
        epsilon=epsilon,
        data_format=data_format,
        is_training=is_training)
  else:
    pop_mean = op.inputs[3]
    pop_var = op.inputs[4]
    if data_format == b"NCHW":
      x = array_ops.transpose(x, [0, 2, 3, 1])
      grad_y = array_ops.transpose(grad_y, [0, 2, 3, 1])
    dx, dscale, doffset, _, _ = grad_fun(
        grad_y,
        x,
        scale,
        pop_mean,
        pop_var,
        epsilon=epsilon,
        data_format='NHWC',
        is_training=is_training)
    if data_format == b"NCHW":
      dx = array_ops.transpose(dx, [0, 3, 1, 2])
    return dx, dscale, doffset, None, None

Example #26

Source File: nn_impl.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License

4 votes

def batch_normalization(x,
                        mean,
                        variance,
                        offset,
                        scale,
                        variance_epsilon,
                        name=None):
  r"""Batch normalization.

  As described in http://arxiv.org/abs/1502.03167.
  Normalizes a tensor by `mean` and `variance`, and applies (optionally) a
  `scale` \\(\gamma\\) to it, as well as an `offset` \\(\beta\\):

  \\(\frac{\gamma(x-\mu)}{\sigma}+\beta\\)

  `mean`, `variance`, `offset` and `scale` are all expected to be of one of two
  shapes:

    * In all generality, they can have the same number of dimensions as the
      input `x`, with identical sizes as `x` for the dimensions that are not
      normalized over (the 'depth' dimension(s)), and dimension 1 for the
      others which are being normalized over.
      `mean` and `variance` in this case would typically be the outputs of
      `tf.nn.moments(..., keep_dims=True)` during training, or running averages
      thereof during inference.
    * In the common case where the 'depth' dimension is the last dimension in
      the input tensor `x`, they may be one dimensional tensors of the same
      size as the 'depth' dimension.
      This is the case for example for the common `[batch, depth]` layout of
      fully-connected layers, and `[batch, height, width, depth]` for
      convolutions.
      `mean` and `variance` in this case would typically be the outputs of
      `tf.nn.moments(..., keep_dims=False)` during training, or running averages
      thereof during inference.

  Args:
    x: Input `Tensor` of arbitrary dimensionality.
    mean: A mean `Tensor`.
    variance: A variance `Tensor`.
    offset: An offset `Tensor`, often denoted \\(\beta\\) in equations, or
      None. If present, will be added to the normalized tensor.
    scale: A scale `Tensor`, often denoted \\(\gamma\\) in equations, or
      `None`. If present, the scale is applied to the normalized tensor.
    variance_epsilon: A small float number to avoid dividing by 0.
    name: A name for this operation (optional).

  Returns:
    the normalized, scaled, offset tensor.
  """
  with ops.name_scope(name, "batchnorm", [x, mean, variance, scale, offset]):
    inv = math_ops.rsqrt(variance + variance_epsilon)
    if scale is not None:
      inv *= scale
    return x * inv + (offset - mean * inv
                      if offset is not None else -mean * inv)

Example #27

Source File: copy_attention_wrapper.py From question-generation with MIT License

4 votes

def _bahdanau_score(processed_query, keys, normalize):
  """Implements Bahdanau-style (additive) scoring function.

  This attention has two forms.  The first is Bhandanau attention,
  as described in:

  Dzmitry Bahdanau, Kyunghyun Cho, Yoshua Bengio.
  "Neural Machine Translation by Jointly Learning to Align and Translate."
  ICLR 2015. https://arxiv.org/abs/1409.0473

  The second is the normalized form.  This form is inspired by the
  weight normalization article:

  Tim Salimans, Diederik P. Kingma.
  "Weight Normalization: A Simple Reparameterization to Accelerate
   Training of Deep Neural Networks."
  https://arxiv.org/abs/1602.07868

  To enable the second form, set `normalize=True`.

  Args:
    processed_query: Tensor, shape `[batch_size, num_units]` to compare to keys.
    keys: Processed memory, shape `[batch_size, max_time, num_units]`.
    normalize: Whether to normalize the score function.

  Returns:
    A `[batch_size, max_time]` tensor of unnormalized score values.
  """
  dtype = processed_query.dtype
  # Get the number of hidden units from the trailing dimension of keys
  num_units = keys.shape[2].value or array_ops.shape(keys)[2]
  # Reshape from [batch_size, ...] to [batch_size, 1, ...] for broadcasting.
  processed_query = array_ops.expand_dims(processed_query, 1)
  v = variable_scope.get_variable(
      "attention_v", [num_units], dtype=dtype)
  if normalize:
    # Scalar used in weight normalization
    g = variable_scope.get_variable(
        "attention_g", dtype=dtype,
        initializer=math.sqrt((1. / num_units)))
    # Bias added prior to the nonlinearity
    b = variable_scope.get_variable(
        "attention_b", [num_units], dtype=dtype,
        initializer=init_ops.zeros_initializer())
    # normed_v = g * v / ||v||
    normed_v = g * v * math_ops.rsqrt(
        math_ops.reduce_sum(math_ops.square(v)))
    return math_ops.reduce_sum(
        normed_v * math_ops.tanh(keys + processed_query + b), [2])
  else:
    return math_ops.reduce_sum(v * math_ops.tanh(keys + processed_query), [2])

Example #28

Source File: nn_impl.py From keras-lambda with MIT License

4 votes

def batch_normalization(x,
                        mean,
                        variance,
                        offset,
                        scale,
                        variance_epsilon,
                        name=None):
  r"""Batch normalization.

  As described in http://arxiv.org/abs/1502.03167.
  Normalizes a tensor by `mean` and `variance`, and applies (optionally) a
  `scale` \\(\gamma\\) to it, as well as an `offset` \\(\beta\\):

  \\(\frac{\gamma(x-\mu)}{\sigma}+\beta\\)

  `mean`, `variance`, `offset` and `scale` are all expected to be of one of two
  shapes:

    * In all generality, they can have the same number of dimensions as the
      input `x`, with identical sizes as `x` for the dimensions that are not
      normalized over (the 'depth' dimension(s)), and dimension 1 for the
      others which are being normalized over.
      `mean` and `variance` in this case would typically be the outputs of
      `tf.nn.moments(..., keep_dims=True)` during training, or running averages
      thereof during inference.
    * In the common case where the 'depth' dimension is the last dimension in
      the input tensor `x`, they may be one dimensional tensors of the same
      size as the 'depth' dimension.
      This is the case for example for the common `[batch, depth]` layout of
      fully-connected layers, and `[batch, height, width, depth]` for
      convolutions.
      `mean` and `variance` in this case would typically be the outputs of
      `tf.nn.moments(..., keep_dims=False)` during training, or running averages
      thereof during inference.

  Args:
    x: Input `Tensor` of arbitrary dimensionality.
    mean: A mean `Tensor`.
    variance: A variance `Tensor`.
    offset: An offset `Tensor`, often denoted \\(\beta\\) in equations, or
      None. If present, will be added to the normalized tensor.
    scale: A scale `Tensor`, often denoted \\(\gamma\\) in equations, or
      `None`. If present, the scale is applied to the normalized tensor.
    variance_epsilon: A small float number to avoid dividing by 0.
    name: A name for this operation (optional).

  Returns:
    the normalized, scaled, offset tensor.
  """
  with ops.name_scope(name, "batchnorm", [x, mean, variance, scale, offset]):
    inv = math_ops.rsqrt(variance + variance_epsilon)
    if scale is not None:
      inv *= scale
    return x * inv + (offset - mean * inv
                      if offset is not None else -mean * inv)

Example #29

Source File: spectral_ops.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License

4 votes

def dct(input, type=2, n=None, axis=-1, norm=None, name=None):  # pylint: disable=redefined-builtin
  """Computes the 1D [Discrete Cosine Transform (DCT)][dct] of `input`.

  Currently only Type II is supported. Implemented using a length `2N` padded
  @{tf.spectral.rfft}, as described here: https://dsp.stackexchange.com/a/10606

  @compatibility(scipy)
  Equivalent to scipy.fftpack.dct for the Type-II DCT.
  https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.dct.html
  @end_compatibility

  Args:
    input: A `[..., samples]` `float32` `Tensor` containing the signals to
      take the DCT of.
    type: The DCT type to perform. Must be 2.
    n: For future expansion. The length of the transform. Must be `None`.
    axis: For future expansion. The axis to compute the DCT along. Must be `-1`.
    norm: The normalization to apply. `None` for no normalization or `'ortho'`
      for orthonormal normalization.
    name: An optional name for the operation.

  Returns:
    A `[..., samples]` `float32` `Tensor` containing the DCT of `input`.

  Raises:
    ValueError: If `type` is not `2`, `n` is not `None, `axis` is not `-1`, or
      `norm` is not `None` or `'ortho'`.

  [dct]: https://en.wikipedia.org/wiki/Discrete_cosine_transform
  """
  _validate_dct_arguments(type, n, axis, norm)
  with _ops.name_scope(name, "dct", [input]):
    # We use the RFFT to compute the DCT and TensorFlow only supports float32
    # for FFTs at the moment.
    input = _ops.convert_to_tensor(input, dtype=_dtypes.float32)

    axis_dim = input.shape[-1].value or _array_ops.shape(input)[-1]
    axis_dim_float = _math_ops.to_float(axis_dim)
    scale = 2.0 * _math_ops.exp(_math_ops.complex(
        0.0, -_math.pi * _math_ops.range(axis_dim_float) /
        (2.0 * axis_dim_float)))

    # TODO(rjryan): Benchmark performance and memory usage of the various
    # approaches to computing a DCT via the RFFT.
    dct2 = _math_ops.real(
        rfft(input, fft_length=[2 * axis_dim])[..., :axis_dim] * scale)

    if norm == "ortho":
      n1 = 0.5 * _math_ops.rsqrt(axis_dim_float)
      n2 = n1 * _math_ops.sqrt(2.0)
      # Use tf.pad to make a vector of [n1, n2, n2, n2, ...].
      weights = _array_ops.pad(
          _array_ops.expand_dims(n1, 0), [[0, axis_dim - 1]],
          constant_values=n2)
      dct2 *= weights

    return dct2

Example #30

Source File: attention_wrapper.py From QGforQA with MIT License

4 votes

def _bahdanau_score(processed_query, keys, normalize):
  """Implements Bahdanau-style (additive) scoring function.

  This attention has two forms.  The first is Bhandanau attention,
  as described in:

  Dzmitry Bahdanau, Kyunghyun Cho, Yoshua Bengio.
  "Neural Machine Translation by Jointly Learning to Align and Translate."
  ICLR 2015. https://arxiv.org/abs/1409.0473

  The second is the normalized form.  This form is inspired by the
  weight normalization article:

  Tim Salimans, Diederik P. Kingma.
  "Weight Normalization: A Simple Reparameterization to Accelerate
   Training of Deep Neural Networks."
  https://arxiv.org/abs/1602.07868

  To enable the second form, set `normalize=True`.

  Args:
    processed_query: Tensor, shape `[batch_size, num_units]` to compare to keys.
    keys: Processed memory, shape `[batch_size, max_time, num_units]`.
    normalize: Whether to normalize the score function.

  Returns:
    A `[batch_size, max_time]` tensor of unnormalized score values.
  """
  dtype = processed_query.dtype
  # Get the number of hidden units from the trailing dimension of keys
  num_units = keys.shape[2].value or array_ops.shape(keys)[2]
  # Reshape from [batch_size, ...] to [batch_size, 1, ...] for broadcasting.
  processed_query = array_ops.expand_dims(processed_query, 1)
  v = variable_scope.get_variable(
      "attention_v", [num_units], dtype=dtype)
  if normalize:
    # Scalar used in weight normalization
    g = variable_scope.get_variable(
        "attention_g", dtype=dtype,
        initializer=math.sqrt((1. / num_units)))
    # Bias added prior to the nonlinearity
    b = variable_scope.get_variable(
        "attention_b", [num_units], dtype=dtype,
        initializer=init_ops.zeros_initializer())
    # normed_v = g * v / ||v||
    normed_v = g * v * math_ops.rsqrt(
        math_ops.reduce_sum(math_ops.square(v)))
    return math_ops.reduce_sum(
        normed_v * math_ops.tanh(keys + processed_query + b), [2])
  else:
    return math_ops.reduce_sum(v * math_ops.tanh(keys + processed_query), [2])