Python tensorflow.python.ops.math_ops.rsqrt() Examples
The following are 30
code examples of tensorflow.python.ops.math_ops.rsqrt().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.python.ops.math_ops
, or try the search function
.
Example #1
Source File: utils.py From Transformer-in-generating-dialogue with Apache License 2.0 | 6 votes |
def LRSchedule(global_step, d_model, warmup_steps=4000): if global_step is None: raise ValueError("global_step is required for learning_rate_schedule.") def deal_lr(global_step, d_model, warmup_steps): d_model = ops.convert_to_tensor(d_model, dtype=tf.float32) dtype = d_model.dtype warmup_steps = math_ops.cast(warmup_steps, dtype) global_step_recomp = math_ops.cast(global_step, dtype) arg1 = math_ops.rsqrt(global_step_recomp) arg2 = math_ops.multiply(global_step_recomp, math_ops.pow(warmup_steps, -1.5)) return math_ops.multiply(math_ops.rsqrt(d_model), math_ops.minimum(arg1, arg2)) return functools.partial(deal_lr, global_step, d_model, warmup_steps)
Example #2
Source File: student_t.py From lambda-packs with MIT License | 6 votes |
def _sample_n(self, n, seed=None): # The sampling method comes from the fact that if: # X ~ Normal(0, 1) # Z ~ Chi2(df) # Y = X / sqrt(Z / df) # then: # Y ~ StudentT(df). shape = array_ops.concat([[n], self.batch_shape_tensor()], 0) normal_sample = random_ops.random_normal(shape, dtype=self.dtype, seed=seed) df = self.df * array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype) gamma_sample = random_ops.random_gamma( [n], 0.5 * df, beta=0.5, dtype=self.dtype, seed=distribution_util.gen_new_seed(seed, salt="student_t")) samples = normal_sample * math_ops.rsqrt(gamma_sample / df) return samples * self.scale + self.loc # Abs(scale) not wanted.
Example #3
Source File: student_t.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 6 votes |
def _sample_n(self, n, seed=None): # The sampling method comes from the fact that if: # X ~ Normal(0, 1) # Z ~ Chi2(df) # Y = X / sqrt(Z / df) # then: # Y ~ StudentT(df). shape = array_ops.concat([[n], self.batch_shape_tensor()], 0) normal_sample = random_ops.random_normal(shape, dtype=self.dtype, seed=seed) df = self.df * array_ops.ones(self.batch_shape_tensor(), dtype=self.dtype) gamma_sample = random_ops.random_gamma( [n], 0.5 * df, beta=0.5, dtype=self.dtype, seed=distribution_util.gen_new_seed(seed, salt="student_t")) samples = normal_sample * math_ops.rsqrt(gamma_sample / df) return samples * self.scale + self.loc # Abs(scale) not wanted.
Example #4
Source File: nn_grad.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 6 votes |
def _BatchNormWithGlobalNormalizationGrad(op, grad): """Return the gradients for the 5 inputs of BatchNormWithGlobalNormalization. We do not backprop anything for the mean and var intentionally as they are not being trained with backprop in the operation. Args: op: The BatchNormOp for which we need to generate gradients. grad: Tensor. The gradients passed to the BatchNormOp. Returns: dx: Backprop for input, which is (grad * (g * rsqrt(v + epsilon))) dm: Backprop for mean, which is sum_over_rest(grad * g) * (-1 / rsqrt(v + epsilon)) dv: Backprop for variance, which is sum_over_rest(grad * g * (x - m)) * (-1/2) * (v + epsilon) ^ (-3/2) db: Backprop for beta, which is grad reduced in all except the last dimension. dg: Backprop for gamma, which is (grad * ((x - m) * rsqrt(v + epsilon))) """ dx, dm, dv, db, dg = gen_nn_ops._batch_norm_with_global_normalization_grad( op.inputs[0], op.inputs[1], op.inputs[2], op.inputs[4], grad, op.get_attr("variance_epsilon"), op.get_attr("scale_after_normalization")) return dx, dm, dv, db, dg
Example #5
Source File: layers.py From tf-slim with Apache License 2.0 | 5 votes |
def poincare_normalize(x, axis=1, epsilon=1e-5, name=None): """Project into the Poincare ball with norm <= 1.0 - epsilon. https://en.wikipedia.org/wiki/Poincare_ball_model Used in Poincare Embeddings for Learning Hierarchical Representations Maximilian Nickel, Douwe Kiela https://arxiv.org/pdf/1705.08039.pdf For a 1-D tensor with `axis = 0`, computes (x * (1 - epsilon)) / ||x|| if ||x|| > 1 - epsilon output = x otherwise For `x` with more dimensions, independently normalizes each 1-D slice along dimension `axis`. Args: x: A `Tensor`. axis: Axis along which to normalize. A scalar or a vector of integers. epsilon: A small deviation from the edge of the unit sphere for numerical stability. name: A name for this operation (optional). Returns: A `Tensor` with the same shape as `x`. """ with ops.name_scope(name, 'poincare_normalize', [x]) as name: x = ops.convert_to_tensor(x, name='x') square_sum = math_ops.reduce_sum(math_ops.square(x), axis, keepdims=True) x_inv_norm = math_ops.rsqrt(square_sum) x_inv_norm = math_ops.minimum((1. - epsilon) * x_inv_norm, 1.) return math_ops.multiply(x, x_inv_norm, name=name)
Example #6
Source File: test_forward.py From incubator-tvm with Apache License 2.0 | 5 votes |
def _test_rsqrt(data): """ One iteration of rsqrt """ return _test_unary_elemwise(math_ops.rsqrt, data) ####################################################################### # Neg # ---
Example #7
Source File: clip_ops.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 5 votes |
def clip_by_average_norm(t, clip_norm, name=None): """Clips tensor values to a maximum average L2-norm. Given a tensor `t`, and a maximum clip value `clip_norm`, this operation normalizes `t` so that its average L2-norm is less than or equal to `clip_norm`. Specifically, if the average L2-norm is already less than or equal to `clip_norm`, then `t` is not modified. If the average L2-norm is greater than `clip_norm`, then this operation returns a tensor of the same type and shape as `t` with its values set to: `t * clip_norm / l2norm_avg(t)` In this case, the average L2-norm of the output tensor is `clip_norm`. This operation is typically used to clip gradients before applying them with an optimizer. Args: t: A `Tensor`. clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value. name: A name for the operation (optional). Returns: A clipped `Tensor`. """ with ops.name_scope(name, "clip_by_average_norm", [t, clip_norm]) as name: t = ops.convert_to_tensor(t, name="t") # Calculate L2-norm per element, clip elements by ratio of clip_norm to # L2-norm per element n_element = math_ops.cast(array_ops.size(t), dtypes.float32) l2norm_inv = math_ops.rsqrt( math_ops.reduce_sum(t * t, math_ops.range(array_ops.rank(t)))) tclip = array_ops.identity( t * clip_norm * math_ops.minimum( l2norm_inv * n_element, constant_op.constant(1.0) / clip_norm), name=name) return tclip
Example #8
Source File: gdn.py From pcc_geo_cnn with MIT License | 5 votes |
def call(self, inputs): inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) ndim = self._input_rank if self.rectify: inputs = nn.relu(inputs) # Compute normalization pool. if ndim == 2: norm_pool = math_ops.matmul(math_ops.square(inputs), self.gamma) norm_pool = nn.bias_add(norm_pool, self.beta) elif self.data_format == "channels_last" and ndim <= 5: shape = self.gamma.shape.as_list() gamma = array_ops.reshape(self.gamma, (ndim - 2) * [1] + shape) norm_pool = nn.convolution(math_ops.square(inputs), gamma, "VALID") norm_pool = nn.bias_add(norm_pool, self.beta) else: # generic implementation # This puts channels in the last dimension regardless of input. norm_pool = math_ops.tensordot( math_ops.square(inputs), self.gamma, [[self._channel_axis()], [0]]) norm_pool += self.beta if self.data_format == "channels_first": # Return to channels_first format if necessary. axes = list(range(ndim - 1)) axes.insert(1, ndim - 1) norm_pool = array_ops.transpose(norm_pool, axes) if self.inverse: norm_pool = math_ops.sqrt(norm_pool) else: norm_pool = math_ops.rsqrt(norm_pool) outputs = inputs * norm_pool if not context.executing_eagerly(): outputs.set_shape(self.compute_output_shape(inputs.shape)) return outputs
Example #9
Source File: image_ops_impl.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 5 votes |
def per_image_standardization(image): """Linearly scales `image` to have zero mean and unit norm. This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average of all values in image, and `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`. `stddev` is the standard deviation of all values in `image`. It is capped away from zero to protect against division by 0 when handling uniform images. Args: image: 3-D tensor of shape `[height, width, channels]`. Returns: The standardized image with same shape as `image`. Raises: ValueError: if the shape of 'image' is incompatible with this function. """ image = ops.convert_to_tensor(image, name='image') image = control_flow_ops.with_dependencies( _Check3DImage(image, require_static=False), image) num_pixels = math_ops.reduce_prod(array_ops.shape(image)) image = math_ops.cast(image, dtype=dtypes.float32) image_mean = math_ops.reduce_mean(image) variance = (math_ops.reduce_mean(math_ops.square(image)) - math_ops.square(image_mean)) variance = gen_nn_ops.relu(variance) stddev = math_ops.sqrt(variance) # Apply a minimum normalization that protects us against uniform images. min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32)) pixel_value_scale = math_ops.maximum(stddev, min_stddev) pixel_value_offset = image_mean image = math_ops.subtract(image, pixel_value_offset) image = math_ops.div(image, pixel_value_scale) return image
Example #10
Source File: layers.py From tensornets with MIT License | 5 votes |
def poincare_normalize(x, axis=1, epsilon=1e-5, name=None): """Project into the Poincare ball with norm <= 1.0 - epsilon. https://en.wikipedia.org/wiki/Poincare_ball_model Used in Poincare Embeddings for Learning Hierarchical Representations Maximilian Nickel, Douwe Kiela https://arxiv.org/pdf/1705.08039.pdf For a 1-D tensor with `axis = 0`, computes (x * (1 - epsilon)) / ||x|| if ||x|| > 1 - epsilon output = x otherwise For `x` with more dimensions, independently normalizes each 1-D slice along dimension `axis`. Args: x: A `Tensor`. axis: Axis along which to normalize. A scalar or a vector of integers. epsilon: A small deviation from the edge of the unit sphere for numerical stability. name: A name for this operation (optional). Returns: A `Tensor` with the same shape as `x`. """ with ops.name_scope(name, 'poincare_normalize', [x]) as name: x = ops.convert_to_tensor(x, name='x') square_sum = math_ops.reduce_sum(math_ops.square(x), axis, keepdims=True) x_inv_norm = math_ops.rsqrt(square_sum) x_inv_norm = math_ops.minimum((1. - epsilon) * x_inv_norm, 1.) return math_ops.multiply(x, x_inv_norm, name=name)
Example #11
Source File: nn_impl.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 5 votes |
def l2_normalize(x, dim, epsilon=1e-12, name=None): """Normalizes along dimension `dim` using an L2 norm. For a 1-D tensor with `dim = 0`, computes output = x / sqrt(max(sum(x**2), epsilon)) For `x` with more dimensions, independently normalizes each 1-D slice along dimension `dim`. Args: x: A `Tensor`. dim: Dimension along which to normalize. A scalar or a vector of integers. epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the divisor if `norm < sqrt(epsilon)`. name: A name for this operation (optional). Returns: A `Tensor` with the same shape as `x`. """ with ops.name_scope(name, "l2_normalize", [x]) as name: x = ops.convert_to_tensor(x, name="x") square_sum = math_ops.reduce_sum(math_ops.square(x), dim, keep_dims=True) x_inv_norm = math_ops.rsqrt(math_ops.maximum(square_sum, epsilon)) return math_ops.multiply(x, x_inv_norm, name=name)
Example #12
Source File: clip_ops.py From deep_image_model with Apache License 2.0 | 5 votes |
def clip_by_average_norm(t, clip_norm, name=None): """Clips tensor values to a maximum average L2-norm. Given a tensor `t`, and a maximum clip value `clip_norm`, this operation normalizes `t` so that its average L2-norm is less than or equal to `clip_norm`. Specifically, if the average L2-norm is already less than or equal to `clip_norm`, then `t` is not modified. If the average L2-norm is greater than `clip_norm`, then this operation returns a tensor of the same type and shape as `t` with its values set to: `t * clip_norm / l2norm_avg(t)` In this case, the average L2-norm of the output tensor is `clip_norm`. This operation is typically used to clip gradients before applying them with an optimizer. Args: t: A `Tensor`. clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value. name: A name for the operation (optional). Returns: A clipped `Tensor`. """ with ops.name_scope(name, "clip_by_average_norm", [t, clip_norm]) as name: t = ops.convert_to_tensor(t, name="t") # Calculate L2-norm per element, clip elements by ratio of clip_norm to # L2-norm per element n_element = math_ops.cast(array_ops.size(t), dtypes.float32) l2norm_inv = math_ops.rsqrt( math_ops.reduce_sum(t * t, math_ops.range(array_ops.rank(t)))) tclip = array_ops.identity( t * clip_norm * math_ops.minimum( l2norm_inv * n_element, constant_op.constant(1.0) / clip_norm), name=name) return tclip
Example #13
Source File: nn.py From deep_image_model with Apache License 2.0 | 5 votes |
def l2_normalize(x, dim, epsilon=1e-12, name=None): """Normalizes along dimension `dim` using an L2 norm. For a 1-D tensor with `dim = 0`, computes output = x / sqrt(max(sum(x**2), epsilon)) For `x` with more dimensions, independently normalizes each 1-D slice along dimension `dim`. Args: x: A `Tensor`. dim: Dimension along which to normalize. A scalar or a vector of integers. epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the divisor if `norm < sqrt(epsilon)`. name: A name for this operation (optional). Returns: A `Tensor` with the same shape as `x`. """ with ops.name_scope(name, "l2_normalize", [x]) as name: x = ops.convert_to_tensor(x, name="x") square_sum = math_ops.reduce_sum(math_ops.square(x), dim, keep_dims=True) x_inv_norm = math_ops.rsqrt(math_ops.maximum(square_sum, epsilon)) return math_ops.mul(x, x_inv_norm, name=name)
Example #14
Source File: image_ops_impl.py From keras-lambda with MIT License | 5 votes |
def per_image_standardization(image): """Linearly scales `image` to have zero mean and unit norm. This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average of all values in image, and `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`. `stddev` is the standard deviation of all values in `image`. It is capped away from zero to protect against division by 0 when handling uniform images. Args: image: 3-D tensor of shape `[height, width, channels]`. Returns: The standardized image with same shape as `image`. Raises: ValueError: if the shape of 'image' is incompatible with this function. """ image = ops.convert_to_tensor(image, name='image') _Check3DImage(image, require_static=False) num_pixels = math_ops.reduce_prod(array_ops.shape(image)) image = math_ops.cast(image, dtype=dtypes.float32) image_mean = math_ops.reduce_mean(image) variance = (math_ops.reduce_mean(math_ops.square(image)) - math_ops.square(image_mean)) variance = gen_nn_ops.relu(variance) stddev = math_ops.sqrt(variance) # Apply a minimum normalization that protects us against uniform images. min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32)) pixel_value_scale = math_ops.maximum(stddev, min_stddev) pixel_value_offset = image_mean image = math_ops.subtract(image, pixel_value_offset) image = math_ops.div(image, pixel_value_scale) return image
Example #15
Source File: official_tf_image.py From X-Detector with Apache License 2.0 | 5 votes |
def per_image_standardization(image): """Linearly scales `image` to have zero mean and unit norm. This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average of all values in image, and `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`. `stddev` is the standard deviation of all values in `image`. It is capped away from zero to protect against division by 0 when handling uniform images. Args: image: 3-D tensor of shape `[height, width, channels]`. Returns: The standardized image with same shape as `image`. Raises: ValueError: if the shape of 'image' is incompatible with this function. """ image = ops.convert_to_tensor(image, name='image') image = control_flow_ops.with_dependencies( _Check3DImage(image, require_static=False), image) num_pixels = math_ops.reduce_prod(array_ops.shape(image)) image = math_ops.cast(image, dtype=dtypes.float32) image_mean = math_ops.reduce_mean(image) variance = (math_ops.reduce_mean(math_ops.square(image)) - math_ops.square(image_mean)) variance = gen_nn_ops.relu(variance) stddev = math_ops.sqrt(variance) # Apply a minimum normalization that protects us against uniform images. min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32)) pixel_value_scale = math_ops.maximum(stddev, min_stddev) pixel_value_offset = image_mean image = math_ops.subtract(image, pixel_value_offset) image = math_ops.div(image, pixel_value_scale) return image
Example #16
Source File: distributed_shampoo.py From lingvo with Apache License 2.0 | 5 votes |
def _apply_dense(self, grad, var): # Calculates the preconditioner statistics for each tensor. partitioned_grads = TensorPartitioner.partition_tensor( grad, self._partition_info) shape = var.get_shape() fallback_to_diagonal = self._fallback_to_diagonal_for_shape(shape) precond_statistics_update = [] if not fallback_to_diagonal: precond_statistics_update = self._updated_statistics( var, partitioned_grads) accumulator = self.get_slot(var, "accumulator") accumulator_updated = state_ops.assign_add(accumulator, grad * grad) accumulator_inv_sqrt = math_ops.rsqrt(accumulator_updated + 1e-30) if self._momentum > 0.0: scaled_g = (1.0 - self._momentum_tensor) * (grad * accumulator_inv_sqrt) gbar = self.get_slot(var, "momentum") gbar_updated = state_ops.assign_add( gbar, gbar * (self._momentum_tensor - 1.0) + scaled_g) else: gbar_updated = (grad * accumulator_inv_sqrt) if not fallback_to_diagonal: # Update the preconditioner statistics followed by computing the # preconditioned gradient. with ops.control_dependencies(precond_statistics_update): s = tf.cast(self._run_nondiagonal_update, tf.float32) preconditioned_grad = self._preconditioned_update( var, partitioned_grads, gbar_updated) # slowly adapt from diagonal to preconditioned gradient. w = self._run_nondiagonal_update_warmup warmup_update = s * self._learning_rate_tensor * ( w * preconditioned_grad + (1.0 - w) * gbar_updated) fallback_update = (1 - s) * (self._learning_rate_tensor * gbar_updated) return state_ops.assign_sub(var, warmup_update + fallback_update) else: return state_ops.assign_sub(var, self._learning_rate_tensor * gbar_updated)
Example #17
Source File: clip_ops.py From lambda-packs with MIT License | 5 votes |
def clip_by_average_norm(t, clip_norm, name=None): """Clips tensor values to a maximum average L2-norm. Given a tensor `t`, and a maximum clip value `clip_norm`, this operation normalizes `t` so that its average L2-norm is less than or equal to `clip_norm`. Specifically, if the average L2-norm is already less than or equal to `clip_norm`, then `t` is not modified. If the average L2-norm is greater than `clip_norm`, then this operation returns a tensor of the same type and shape as `t` with its values set to: `t * clip_norm / l2norm_avg(t)` In this case, the average L2-norm of the output tensor is `clip_norm`. This operation is typically used to clip gradients before applying them with an optimizer. Args: t: A `Tensor`. clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value. name: A name for the operation (optional). Returns: A clipped `Tensor`. """ with ops.name_scope(name, "clip_by_average_norm", [t, clip_norm]) as name: t = ops.convert_to_tensor(t, name="t") # Calculate L2-norm per element, clip elements by ratio of clip_norm to # L2-norm per element n_element = math_ops.cast(array_ops.size(t), dtypes.float32) l2norm_inv = math_ops.rsqrt( math_ops.reduce_sum(t * t, math_ops.range(array_ops.rank(t)))) tclip = array_ops.identity( t * clip_norm * math_ops.minimum( l2norm_inv * n_element, constant_op.constant(1.0) / clip_norm), name=name) return tclip
Example #18
Source File: image_ops_impl.py From lambda-packs with MIT License | 5 votes |
def per_image_standardization(image): """Linearly scales `image` to have zero mean and unit norm. This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average of all values in image, and `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`. `stddev` is the standard deviation of all values in `image`. It is capped away from zero to protect against division by 0 when handling uniform images. Args: image: 3-D tensor of shape `[height, width, channels]`. Returns: The standardized image with same shape as `image`. Raises: ValueError: if the shape of 'image' is incompatible with this function. """ image = ops.convert_to_tensor(image, name='image') image = control_flow_ops.with_dependencies( _Check3DImage(image, require_static=False), image) num_pixels = math_ops.reduce_prod(array_ops.shape(image)) image = math_ops.cast(image, dtype=dtypes.float32) image_mean = math_ops.reduce_mean(image) variance = (math_ops.reduce_mean(math_ops.square(image)) - math_ops.square(image_mean)) variance = gen_nn_ops.relu(variance) stddev = math_ops.sqrt(variance) # Apply a minimum normalization that protects us against uniform images. min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32)) pixel_value_scale = math_ops.maximum(stddev, min_stddev) pixel_value_offset = image_mean image = math_ops.subtract(image, pixel_value_offset) image = math_ops.div(image, pixel_value_scale) return image
Example #19
Source File: dirichlet.py From lambda-packs with MIT License | 5 votes |
def _variance_scale_term(self): """Helper to `_covariance` and `_variance` which computes a shared scale.""" return math_ops.rsqrt(1. + self.total_concentration[..., array_ops.newaxis])
Example #20
Source File: nn_impl.py From keras-lambda with MIT License | 5 votes |
def l2_normalize(x, dim, epsilon=1e-12, name=None): """Normalizes along dimension `dim` using an L2 norm. For a 1-D tensor with `dim = 0`, computes output = x / sqrt(max(sum(x**2), epsilon)) For `x` with more dimensions, independently normalizes each 1-D slice along dimension `dim`. Args: x: A `Tensor`. dim: Dimension along which to normalize. A scalar or a vector of integers. epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the divisor if `norm < sqrt(epsilon)`. name: A name for this operation (optional). Returns: A `Tensor` with the same shape as `x`. """ with ops.name_scope(name, "l2_normalize", [x]) as name: x = ops.convert_to_tensor(x, name="x") square_sum = math_ops.reduce_sum(math_ops.square(x), dim, keep_dims=True) x_inv_norm = math_ops.rsqrt(math_ops.maximum(square_sum, epsilon)) return math_ops.multiply(x, x_inv_norm, name=name)
Example #21
Source File: nn_impl.py From auto-alt-text-lambda-api with MIT License | 5 votes |
def l2_normalize(x, dim, epsilon=1e-12, name=None): """Normalizes along dimension `dim` using an L2 norm. For a 1-D tensor with `dim = 0`, computes output = x / sqrt(max(sum(x**2), epsilon)) For `x` with more dimensions, independently normalizes each 1-D slice along dimension `dim`. Args: x: A `Tensor`. dim: Dimension along which to normalize. A scalar or a vector of integers. epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the divisor if `norm < sqrt(epsilon)`. name: A name for this operation (optional). Returns: A `Tensor` with the same shape as `x`. """ with ops.name_scope(name, "l2_normalize", [x]) as name: x = ops.convert_to_tensor(x, name="x") square_sum = math_ops.reduce_sum(math_ops.square(x), dim, keep_dims=True) x_inv_norm = math_ops.rsqrt(math_ops.maximum(square_sum, epsilon)) return math_ops.multiply(x, x_inv_norm, name=name)
Example #22
Source File: clip_ops.py From keras-lambda with MIT License | 5 votes |
def clip_by_average_norm(t, clip_norm, name=None): """Clips tensor values to a maximum average L2-norm. Given a tensor `t`, and a maximum clip value `clip_norm`, this operation normalizes `t` so that its average L2-norm is less than or equal to `clip_norm`. Specifically, if the average L2-norm is already less than or equal to `clip_norm`, then `t` is not modified. If the average L2-norm is greater than `clip_norm`, then this operation returns a tensor of the same type and shape as `t` with its values set to: `t * clip_norm / l2norm_avg(t)` In this case, the average L2-norm of the output tensor is `clip_norm`. This operation is typically used to clip gradients before applying them with an optimizer. Args: t: A `Tensor`. clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value. name: A name for the operation (optional). Returns: A clipped `Tensor`. """ with ops.name_scope(name, "clip_by_average_norm", [t, clip_norm]) as name: t = ops.convert_to_tensor(t, name="t") # Calculate L2-norm per element, clip elements by ratio of clip_norm to # L2-norm per element n_element = math_ops.cast(array_ops.size(t), dtypes.float32) l2norm_inv = math_ops.rsqrt( math_ops.reduce_sum(t * t, math_ops.range(array_ops.rank(t)))) tclip = array_ops.identity( t * clip_norm * math_ops.minimum( l2norm_inv * n_element, constant_op.constant(1.0) / clip_norm), name=name) return tclip
Example #23
Source File: clip_ops.py From auto-alt-text-lambda-api with MIT License | 5 votes |
def clip_by_average_norm(t, clip_norm, name=None): """Clips tensor values to a maximum average L2-norm. Given a tensor `t`, and a maximum clip value `clip_norm`, this operation normalizes `t` so that its average L2-norm is less than or equal to `clip_norm`. Specifically, if the average L2-norm is already less than or equal to `clip_norm`, then `t` is not modified. If the average L2-norm is greater than `clip_norm`, then this operation returns a tensor of the same type and shape as `t` with its values set to: `t * clip_norm / l2norm_avg(t)` In this case, the average L2-norm of the output tensor is `clip_norm`. This operation is typically used to clip gradients before applying them with an optimizer. Args: t: A `Tensor`. clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value. name: A name for the operation (optional). Returns: A clipped `Tensor`. """ with ops.name_scope(name, "clip_by_average_norm", [t, clip_norm]) as name: t = ops.convert_to_tensor(t, name="t") # Calculate L2-norm per element, clip elements by ratio of clip_norm to # L2-norm per element n_element = math_ops.cast(array_ops.size(t), dtypes.float32) l2norm_inv = math_ops.rsqrt( math_ops.reduce_sum(t * t, math_ops.range(array_ops.rank(t)))) tclip = array_ops.identity( t * clip_norm * math_ops.minimum( l2norm_inv * n_element, constant_op.constant(1.0) / clip_norm), name=name) return tclip
Example #24
Source File: image_ops_impl.py From auto-alt-text-lambda-api with MIT License | 5 votes |
def per_image_standardization(image): """Linearly scales `image` to have zero mean and unit norm. This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average of all values in image, and `adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`. `stddev` is the standard deviation of all values in `image`. It is capped away from zero to protect against division by 0 when handling uniform images. Args: image: 3-D tensor of shape `[height, width, channels]`. Returns: The standardized image with same shape as `image`. Raises: ValueError: if the shape of 'image' is incompatible with this function. """ image = ops.convert_to_tensor(image, name='image') _Check3DImage(image, require_static=False) num_pixels = math_ops.reduce_prod(array_ops.shape(image)) image = math_ops.cast(image, dtype=dtypes.float32) image_mean = math_ops.reduce_mean(image) variance = (math_ops.reduce_mean(math_ops.square(image)) - math_ops.square(image_mean)) variance = gen_nn_ops.relu(variance) stddev = math_ops.sqrt(variance) # Apply a minimum normalization that protects us against uniform images. min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32)) pixel_value_scale = math_ops.maximum(stddev, min_stddev) pixel_value_offset = image_mean image = math_ops.subtract(image, pixel_value_offset) image = math_ops.div(image, pixel_value_scale) return image
Example #25
Source File: nn_grad.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 4 votes |
def _BaseFusedBatchNormGrad(op, use_v2, *grad): """Return the gradients for the 3 inputs of BatchNorm. Args: op: The BatchNormOp for which we need to compute gradients. use_v2: Boolean indicating whether to use the V2 version of the fused batch norm gradient. *grad: An argument list for tensors of gradients wrt the outputs with grad[0] as grad_y. Returns: grad_x: gradient for x, which is scale * rsqrt(variance + epsilon) * [grad_y - mean(grad_y) - (x - mean(x)) * mean(grad_y * (x - mean(x))) / (variance + epsilon)] in training mode; grad_y * scale * rsqrt(pop_variance + epsilon) in freeze mode. grad_scale: gradient for scale, which is sum(grad_y * (x - mean(x)) * rsqrt(variance + epsilon)) in training mode; sum(grad_y * (x - pop_mean) * rsqrt(pop_variance + epsilon)) in freeze mode. grad_offset: gradient for offset, which is sum(grad_y) in training mode; sum(grad_y) in freeze mode. """ x = op.inputs[0] grad_y = grad[0] scale = op.inputs[1] epsilon = op.get_attr("epsilon") data_format = op.get_attr("data_format") is_training = op.get_attr("is_training") grad_fun = (gen_nn_ops.fused_batch_norm_grad_v2 if use_v2 else gen_nn_ops.fused_batch_norm_grad) if is_training: return grad_fun( grad_y, x, scale, op.outputs[3], op.outputs[4], epsilon=epsilon, data_format=data_format, is_training=is_training) else: pop_mean = op.inputs[3] pop_var = op.inputs[4] if data_format == b"NCHW": x = array_ops.transpose(x, [0, 2, 3, 1]) grad_y = array_ops.transpose(grad_y, [0, 2, 3, 1]) dx, dscale, doffset, _, _ = grad_fun( grad_y, x, scale, pop_mean, pop_var, epsilon=epsilon, data_format='NHWC', is_training=is_training) if data_format == b"NCHW": dx = array_ops.transpose(dx, [0, 3, 1, 2]) return dx, dscale, doffset, None, None
Example #26
Source File: nn_impl.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 4 votes |
def batch_normalization(x, mean, variance, offset, scale, variance_epsilon, name=None): r"""Batch normalization. As described in http://arxiv.org/abs/1502.03167. Normalizes a tensor by `mean` and `variance`, and applies (optionally) a `scale` \\(\gamma\\) to it, as well as an `offset` \\(\beta\\): \\(\frac{\gamma(x-\mu)}{\sigma}+\beta\\) `mean`, `variance`, `offset` and `scale` are all expected to be of one of two shapes: * In all generality, they can have the same number of dimensions as the input `x`, with identical sizes as `x` for the dimensions that are not normalized over (the 'depth' dimension(s)), and dimension 1 for the others which are being normalized over. `mean` and `variance` in this case would typically be the outputs of `tf.nn.moments(..., keep_dims=True)` during training, or running averages thereof during inference. * In the common case where the 'depth' dimension is the last dimension in the input tensor `x`, they may be one dimensional tensors of the same size as the 'depth' dimension. This is the case for example for the common `[batch, depth]` layout of fully-connected layers, and `[batch, height, width, depth]` for convolutions. `mean` and `variance` in this case would typically be the outputs of `tf.nn.moments(..., keep_dims=False)` during training, or running averages thereof during inference. Args: x: Input `Tensor` of arbitrary dimensionality. mean: A mean `Tensor`. variance: A variance `Tensor`. offset: An offset `Tensor`, often denoted \\(\beta\\) in equations, or None. If present, will be added to the normalized tensor. scale: A scale `Tensor`, often denoted \\(\gamma\\) in equations, or `None`. If present, the scale is applied to the normalized tensor. variance_epsilon: A small float number to avoid dividing by 0. name: A name for this operation (optional). Returns: the normalized, scaled, offset tensor. """ with ops.name_scope(name, "batchnorm", [x, mean, variance, scale, offset]): inv = math_ops.rsqrt(variance + variance_epsilon) if scale is not None: inv *= scale return x * inv + (offset - mean * inv if offset is not None else -mean * inv)
Example #27
Source File: copy_attention_wrapper.py From question-generation with MIT License | 4 votes |
def _bahdanau_score(processed_query, keys, normalize): """Implements Bahdanau-style (additive) scoring function. This attention has two forms. The first is Bhandanau attention, as described in: Dzmitry Bahdanau, Kyunghyun Cho, Yoshua Bengio. "Neural Machine Translation by Jointly Learning to Align and Translate." ICLR 2015. https://arxiv.org/abs/1409.0473 The second is the normalized form. This form is inspired by the weight normalization article: Tim Salimans, Diederik P. Kingma. "Weight Normalization: A Simple Reparameterization to Accelerate Training of Deep Neural Networks." https://arxiv.org/abs/1602.07868 To enable the second form, set `normalize=True`. Args: processed_query: Tensor, shape `[batch_size, num_units]` to compare to keys. keys: Processed memory, shape `[batch_size, max_time, num_units]`. normalize: Whether to normalize the score function. Returns: A `[batch_size, max_time]` tensor of unnormalized score values. """ dtype = processed_query.dtype # Get the number of hidden units from the trailing dimension of keys num_units = keys.shape[2].value or array_ops.shape(keys)[2] # Reshape from [batch_size, ...] to [batch_size, 1, ...] for broadcasting. processed_query = array_ops.expand_dims(processed_query, 1) v = variable_scope.get_variable( "attention_v", [num_units], dtype=dtype) if normalize: # Scalar used in weight normalization g = variable_scope.get_variable( "attention_g", dtype=dtype, initializer=math.sqrt((1. / num_units))) # Bias added prior to the nonlinearity b = variable_scope.get_variable( "attention_b", [num_units], dtype=dtype, initializer=init_ops.zeros_initializer()) # normed_v = g * v / ||v|| normed_v = g * v * math_ops.rsqrt( math_ops.reduce_sum(math_ops.square(v))) return math_ops.reduce_sum( normed_v * math_ops.tanh(keys + processed_query + b), [2]) else: return math_ops.reduce_sum(v * math_ops.tanh(keys + processed_query), [2])
Example #28
Source File: nn_impl.py From keras-lambda with MIT License | 4 votes |
def batch_normalization(x, mean, variance, offset, scale, variance_epsilon, name=None): r"""Batch normalization. As described in http://arxiv.org/abs/1502.03167. Normalizes a tensor by `mean` and `variance`, and applies (optionally) a `scale` \\(\gamma\\) to it, as well as an `offset` \\(\beta\\): \\(\frac{\gamma(x-\mu)}{\sigma}+\beta\\) `mean`, `variance`, `offset` and `scale` are all expected to be of one of two shapes: * In all generality, they can have the same number of dimensions as the input `x`, with identical sizes as `x` for the dimensions that are not normalized over (the 'depth' dimension(s)), and dimension 1 for the others which are being normalized over. `mean` and `variance` in this case would typically be the outputs of `tf.nn.moments(..., keep_dims=True)` during training, or running averages thereof during inference. * In the common case where the 'depth' dimension is the last dimension in the input tensor `x`, they may be one dimensional tensors of the same size as the 'depth' dimension. This is the case for example for the common `[batch, depth]` layout of fully-connected layers, and `[batch, height, width, depth]` for convolutions. `mean` and `variance` in this case would typically be the outputs of `tf.nn.moments(..., keep_dims=False)` during training, or running averages thereof during inference. Args: x: Input `Tensor` of arbitrary dimensionality. mean: A mean `Tensor`. variance: A variance `Tensor`. offset: An offset `Tensor`, often denoted \\(\beta\\) in equations, or None. If present, will be added to the normalized tensor. scale: A scale `Tensor`, often denoted \\(\gamma\\) in equations, or `None`. If present, the scale is applied to the normalized tensor. variance_epsilon: A small float number to avoid dividing by 0. name: A name for this operation (optional). Returns: the normalized, scaled, offset tensor. """ with ops.name_scope(name, "batchnorm", [x, mean, variance, scale, offset]): inv = math_ops.rsqrt(variance + variance_epsilon) if scale is not None: inv *= scale return x * inv + (offset - mean * inv if offset is not None else -mean * inv)
Example #29
Source File: spectral_ops.py From Serverless-Deep-Learning-with-TensorFlow-and-AWS-Lambda with MIT License | 4 votes |
def dct(input, type=2, n=None, axis=-1, norm=None, name=None): # pylint: disable=redefined-builtin """Computes the 1D [Discrete Cosine Transform (DCT)][dct] of `input`. Currently only Type II is supported. Implemented using a length `2N` padded @{tf.spectral.rfft}, as described here: https://dsp.stackexchange.com/a/10606 @compatibility(scipy) Equivalent to scipy.fftpack.dct for the Type-II DCT. https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.dct.html @end_compatibility Args: input: A `[..., samples]` `float32` `Tensor` containing the signals to take the DCT of. type: The DCT type to perform. Must be 2. n: For future expansion. The length of the transform. Must be `None`. axis: For future expansion. The axis to compute the DCT along. Must be `-1`. norm: The normalization to apply. `None` for no normalization or `'ortho'` for orthonormal normalization. name: An optional name for the operation. Returns: A `[..., samples]` `float32` `Tensor` containing the DCT of `input`. Raises: ValueError: If `type` is not `2`, `n` is not `None, `axis` is not `-1`, or `norm` is not `None` or `'ortho'`. [dct]: https://en.wikipedia.org/wiki/Discrete_cosine_transform """ _validate_dct_arguments(type, n, axis, norm) with _ops.name_scope(name, "dct", [input]): # We use the RFFT to compute the DCT and TensorFlow only supports float32 # for FFTs at the moment. input = _ops.convert_to_tensor(input, dtype=_dtypes.float32) axis_dim = input.shape[-1].value or _array_ops.shape(input)[-1] axis_dim_float = _math_ops.to_float(axis_dim) scale = 2.0 * _math_ops.exp(_math_ops.complex( 0.0, -_math.pi * _math_ops.range(axis_dim_float) / (2.0 * axis_dim_float))) # TODO(rjryan): Benchmark performance and memory usage of the various # approaches to computing a DCT via the RFFT. dct2 = _math_ops.real( rfft(input, fft_length=[2 * axis_dim])[..., :axis_dim] * scale) if norm == "ortho": n1 = 0.5 * _math_ops.rsqrt(axis_dim_float) n2 = n1 * _math_ops.sqrt(2.0) # Use tf.pad to make a vector of [n1, n2, n2, n2, ...]. weights = _array_ops.pad( _array_ops.expand_dims(n1, 0), [[0, axis_dim - 1]], constant_values=n2) dct2 *= weights return dct2
Example #30
Source File: attention_wrapper.py From QGforQA with MIT License | 4 votes |
def _bahdanau_score(processed_query, keys, normalize): """Implements Bahdanau-style (additive) scoring function. This attention has two forms. The first is Bhandanau attention, as described in: Dzmitry Bahdanau, Kyunghyun Cho, Yoshua Bengio. "Neural Machine Translation by Jointly Learning to Align and Translate." ICLR 2015. https://arxiv.org/abs/1409.0473 The second is the normalized form. This form is inspired by the weight normalization article: Tim Salimans, Diederik P. Kingma. "Weight Normalization: A Simple Reparameterization to Accelerate Training of Deep Neural Networks." https://arxiv.org/abs/1602.07868 To enable the second form, set `normalize=True`. Args: processed_query: Tensor, shape `[batch_size, num_units]` to compare to keys. keys: Processed memory, shape `[batch_size, max_time, num_units]`. normalize: Whether to normalize the score function. Returns: A `[batch_size, max_time]` tensor of unnormalized score values. """ dtype = processed_query.dtype # Get the number of hidden units from the trailing dimension of keys num_units = keys.shape[2].value or array_ops.shape(keys)[2] # Reshape from [batch_size, ...] to [batch_size, 1, ...] for broadcasting. processed_query = array_ops.expand_dims(processed_query, 1) v = variable_scope.get_variable( "attention_v", [num_units], dtype=dtype) if normalize: # Scalar used in weight normalization g = variable_scope.get_variable( "attention_g", dtype=dtype, initializer=math.sqrt((1. / num_units))) # Bias added prior to the nonlinearity b = variable_scope.get_variable( "attention_b", [num_units], dtype=dtype, initializer=init_ops.zeros_initializer()) # normed_v = g * v / ||v|| normed_v = g * v * math_ops.rsqrt( math_ops.reduce_sum(math_ops.square(v))) return math_ops.reduce_sum( normed_v * math_ops.tanh(keys + processed_query + b), [2]) else: return math_ops.reduce_sum(v * math_ops.tanh(keys + processed_query), [2])