Python tensorflow.pow() Examples

The following are 30 code examples of tensorflow.pow(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: losses.py    From R2CNN_Faster-RCNN_Tensorflow with MIT License 6 votes vote down vote up
def _smooth_l1_loss_base(bbox_pred, bbox_targets, sigma=1.0):
    '''

    :param bbox_pred: [-1, 4] in RPN. [-1, cls_num+1, 4] or [-1, cls_num+1, 5] in Fast-rcnn
    :param bbox_targets: shape is same as bbox_pred
    :param sigma:
    :return:
    '''
    sigma_2 = sigma**2

    box_diff = bbox_pred - bbox_targets

    abs_box_diff = tf.abs(box_diff)

    smoothL1_sign = tf.stop_gradient(
        tf.to_float(tf.less(abs_box_diff, 1. / sigma_2)))
    loss_box = tf.pow(box_diff, 2) * (sigma_2 / 2.0) * smoothL1_sign \
               + (abs_box_diff - (0.5 / sigma_2)) * (1.0 - smoothL1_sign)
    return loss_box 
Example #2
Source File: attacks_tf.py    From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _apply_gradients(self, grads, x, optim_state):
        """Refer to parent class documentation."""
        new_x = [None] * len(x)
        new_optim_state = {
            "t": optim_state["t"] + 1.,
            "m": [None] * len(x),
            "u": [None] * len(x)
        }
        t = new_optim_state["t"]
        for i in xrange(len(x)):
            g = grads[i]
            m_old = optim_state["m"][i]
            u_old = optim_state["u"][i]
            new_optim_state["m"][i] = (
                self._beta1 * m_old + (1. - self._beta1) * g)
            new_optim_state["u"][i] = (
                self._beta2 * u_old + (1. - self._beta2) * g * g)
            m_hat = new_optim_state["m"][i] / (1. - tf.pow(self._beta1, t))
            u_hat = new_optim_state["u"][i] / (1. - tf.pow(self._beta2, t))
            new_x[i] = (
                x[i] - self._lr * m_hat / (tf.sqrt(u_hat) + self._epsilon))
        return new_x, new_optim_state 
Example #3
Source File: ops.py    From mac-network with Apache License 2.0 6 votes vote down vote up
def locationPE(h, w, dim, outDim = -1, addBias = True):    
    x = tf.expand_dims(tf.to_float(tf.linspace(-config.locationBias, config.locationBias, w)), axis = -1)
    y = tf.expand_dims(tf.to_float(tf.linspace(-config.locationBias, config.locationBias, h)), axis = -1)
    i = tf.expand_dims(tf.to_float(tf.range(dim)), axis = 0)

    peSinX = tf.sin(x / (tf.pow(10000.0, i / dim)))
    peCosX = tf.cos(x / (tf.pow(10000.0, i / dim)))
    peSinY = tf.sin(y / (tf.pow(10000.0, i / dim)))
    peCosY = tf.cos(y / (tf.pow(10000.0, i / dim)))

    peSinX = tf.tile(tf.expand_dims(peSinX, axis = 0), [h, 1, 1])
    peCosX = tf.tile(tf.expand_dims(peCosX, axis = 0), [h, 1, 1])
    peSinY = tf.tile(tf.expand_dims(peSinY, axis = 1), [1, w, 1])
    peCosY = tf.tile(tf.expand_dims(peCosY, axis = 1), [1, w, 1]) 

    grid = tf.concat([peSinX, peCosX, peSinY, peCosY], axis = -1)
    dim *= 4
    
    if outDim > 0:
        grid = linear(grid, dim, outDim, addBias = addBias, name = "locationPE")
        dim = outDim

    return grid, dim 
Example #4
Source File: common_attention.py    From fine-lm with MIT License 6 votes vote down vote up
def scaled_dot_product_attention_simple(q, k, v, bias, name=None):
  """Scaled dot-product attention. One head. One spatial dimension.

  Args:
    q: a Tensor with shape [batch, length_q, depth_k]
    k: a Tensor with shape [batch, length_kv, depth_k]
    v: a Tensor with shape [batch, length_kv, depth_v]
    bias: optional Tensor broadcastable to [batch, length_q, length_kv]
    name: an optional string

  Returns:
    A Tensor.
  """
  with tf.variable_scope(
      name, default_name="scaled_dot_product_attention_simple"):
    scalar = tf.rsqrt(tf.to_float(common_layers.shape_list(q)[2]))
    logits = tf.matmul(q * scalar, k, transpose_b=True)
    if bias is not None:
      logits += bias
    weights = tf.nn.softmax(logits, name="attention_weights")
    if common_layers.should_generate_summaries():
      tf.summary.image(
          "attention", tf.expand_dims(tf.pow(weights, 0.2), 3), max_outputs=1)
    return tf.matmul(weights, v) 
Example #5
Source File: tacotron.py    From vae_tacotron with MIT License 6 votes vote down vote up
def add_loss(self, global_step):
    '''Adds loss to the model. Sets "loss" field. initialize must have been called.'''
    with tf.variable_scope('loss') as scope:
      hp = self._hparams
      self.mel_loss = tf.reduce_mean(tf.abs(self.mel_targets - self.mel_outputs))
      l1 = tf.abs(self.linear_targets - self.linear_outputs)
      # Prioritize loss for frequencies under 3000 Hz.
      n_priority_freq = int(3000 / (hp.sample_rate * 0.5) * hp.num_freq)
      self.linear_loss = 0.5 * tf.reduce_mean(l1) + 0.5 * tf.reduce_mean(l1[:,:,0:n_priority_freq])
             
      self.loss = self.mel_loss + self.linear_loss
   
      if hp.use_vae:
          # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
          self.ki_loss = -0.5 * tf.reduce_sum(1 + self.log_var - tf.pow(self.mu, 2) - tf.exp(self.log_var))
          vae_loss_weight = vae_weight(global_step)
          self.loss += self.ki_loss * vae_loss_weight 
Example #6
Source File: layers.py    From PADME with MIT License 6 votes vote down vote up
def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    inputs = self._get_input_tensors(in_layers)
    temp = []
    subspaces = []
    # creates subspaces the same way it was done in AlphaShare
    for input_tensor in inputs:
      subspace_size = int(input_tensor.get_shape()[-1].value / 2)
      subspaces.append(input_tensor[:, :subspace_size])
      subspaces.append(input_tensor[:, subspace_size:])
      product = tf.matmul(tf.transpose(subspaces[0]), subspaces[1])
      subspaces = []
      # calculate squared Frobenius norm
      temp.append(tf.reduce_sum(tf.pow(product, 2)))
    out_tensor = tf.reduce_sum(temp)
    self.out_tensor = out_tensor
    return out_tensor 
Example #7
Source File: losses.py    From CapsLayer with Apache License 2.0 6 votes vote down vote up
def spread_loss(labels, logits, margin, regularizer=None):
    """
    Args:
        labels: [batch_size, num_label].
        logits: [batch_size, num_label].
        margin: Integer or 1-D Tensor.
        regularizer: use regularization.

    Returns:
        loss: Spread loss.
    """
    a_target = cl.reduce_sum(labels * logits, axis=1, keepdims=True)
    dist = (1 - labels) * margin - (a_target - logits)
    dist = tf.pow(tf.maximum(0., dist), 2)
    loss = tf.reduce_mean(tf.reduce_sum(dist, axis=-1))
    if regularizer is not None:
        regularizer = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        loss += tf.reduce_mean(regularizer)
    return(loss) 
Example #8
Source File: losses.py    From CapsLayer with Apache License 2.0 6 votes vote down vote up
def margin_loss(labels,
                logits,
                upper_margin=0.9,
                bottom_margin=0.1,
                downweight=0.5):
    """
    Args:
        labels: [batch_size, num_label].
        logits: [batch_size, num_label].
    """
    positive_selctor = tf.cast(tf.less(logits, upper_margin), tf.float32)
    positive_cost = positive_selctor * labels * tf.pow(logits - upper_margin, 2)

    negative_selctor = tf.cast(tf.greater(logits, bottom_margin), tf.float32)
    negative_cost = negative_selctor * (1 - labels) * tf.pow(logits - bottom_margin, 2)
    loss = 0.5 * positive_cost + 0.5 * downweight * negative_cost
    return tf.reduce_mean(tf.reduce_sum(loss, axis=-1)) 
Example #9
Source File: loss.py    From centernet_tensorflow_wilderface_voc with MIT License 6 votes vote down vote up
def focal_loss(pred, gt):
  ''' Modified focal loss. Exactly the same as CornerNet.
      Runs faster and costs a little bit more memory
    Arguments:
      pred (batch,h,w,c)
      gt_regr (batch,h,w,c)
  '''
  pos_inds = tf.cast(tf.equal(gt,1.0),dtype=tf.float32)
  neg_inds = 1.0-pos_inds
  neg_weights = tf.pow(1.0 - gt, 4.0)
 
  pred=tf.clip_by_value(pred, 1e-6, 1.0 - 1e-6)
  pos_loss = tf.log(pred) * tf.pow(1.0 - pred, 2.0) * pos_inds
  neg_loss = tf.log(1.0 - pred) * tf.pow(pred, 2.0) * neg_weights * neg_inds

  num_pos  = tf.reduce_sum(pos_inds)
  pos_loss = tf.reduce_sum(pos_loss)
  neg_loss = tf.reduce_sum(neg_loss)

   loss =  - (pos_loss + neg_loss) / num_pos 
Example #10
Source File: hmc.py    From zhusuan with MIT License 6 votes vote down vote up
def tune(self, acceptance_rate, fresh_start):
        def adapt_stepsize():
            new_step = tf.assign(self.step, (1 - fresh_start) * self.step + 1)
            rate1 = 1.0 / (new_step + self.t0)
            new_h_bar = tf.assign(
                self.h_bar, (1 - fresh_start) * (1 - rate1) * self.h_bar +
                rate1 * (self.delta - acceptance_rate))
            log_epsilon = self.mu - tf.sqrt(new_step) / self.gamma * new_h_bar
            rate = tf.pow(new_step, -self.kappa)
            new_log_epsilon_bar = tf.assign(
                self.log_epsilon_bar,
                rate * log_epsilon + (1 - fresh_start) * (1 - rate) *
                self.log_epsilon_bar)
            with tf.control_dependencies([new_log_epsilon_bar]):
                new_log_epsilon = tf.identity(log_epsilon)

            return tf.exp(new_log_epsilon)

        c = tf.cond(self.adapt_step_size,
                    adapt_stepsize,
                    lambda: tf.exp(self.log_epsilon_bar))

        return c 
Example #11
Source File: hmc.py    From zhusuan with MIT License 6 votes vote down vote up
def update(self, x):
        # x: (chain_dims data_dims)
        new_t = tf.assign(self.t, self.t + 1)
        weight = (1 - self.decay) / (1 - tf.pow(self.decay, new_t))
        # incr: (chain_dims data_dims)
        incr = [weight * (q - mean) for q, mean in zip(x, self.mean)]
        # mean: (1,...,1 data_dims)
        update_mean = [mean.assign_add(
            tf.reduce_mean(i, axis=self.chain_axes, keepdims=True))
            for mean, i in zip(self.mean, incr)]
        # var: (1,...,1 data_dims)
        new_var = [
            (1 - weight) * var +
            tf.reduce_mean(i * (q - mean), axis=self.chain_axes,
                           keepdims=True)
            for var, i, q, mean in zip(self.var, incr, x, update_mean)]

        update_var = [tf.assign(var, n_var)
                      for var, n_var in zip(self.var, new_var)]
        return update_var 
Example #12
Source File: layers.py    From aboleth with Apache License 2.0 6 votes vote down vote up
def __init__(self, n_features, lenscale=None, p=1, variational=False,
                 learn_lenscale=False):
        """Create an instance of an arc cosine kernel layer."""
        # Setup random weights
        if variational:
            kern = RBFVariational(lenscale=lenscale,
                                  learn_lenscale=learn_lenscale)
        else:
            kern = RBF(lenscale=lenscale, learn_lenscale=learn_lenscale)
        super().__init__(n_features=n_features, kernel=kern)

        # Kernel order
        assert isinstance(p, int) and p >= 0
        if p == 0:
            self.pfunc = tf.sign
        elif p == 1:
            self.pfunc = lambda x: x
        else:
            self.pfunc = lambda x: tf.pow(x, p) 
Example #13
Source File: layers.py    From basenji with Apache License 2.0 6 votes vote down vote up
def call(self, inputs):
    input_shape = tf.shape(inputs)
    batch_size, seq_len = input_shape[0], input_shape[1]

    pos_range = tf.range(-seq_len//2, seq_len//2)
    if self.transform is None:
      pos_feature = pos_range
    elif self.transform == 'abs':
      pos_feature = tf.math.abs(pos_range)
    elif self.transform == 'reversed':
      pos_feature = pos_range[::-1]
    else:
      raise ValueError('Unknown ConcatPosition transform.')

    if self.power != 1:
      pos_feature = tf.pow(pos_feature, self.power)
    pos_feature = tf.expand_dims(pos_feature, axis=0)
    pos_feature = tf.expand_dims(pos_feature, axis=-1)
    pos_feature = tf.tile(pos_feature, [batch_size, 1, 1])
    pos_feature = tf.dtypes.cast(pos_feature, dtype=tf.float32)

    return tf.concat([pos_feature, inputs], axis=-1) 
Example #14
Source File: losses.py    From ros_people_object_detection_tensorflow with Apache License 2.0 5 votes vote down vote up
def _compute_loss(self,
                    prediction_tensor,
                    target_tensor,
                    weights,
                    class_indices=None):
    """Compute loss function.

    Args:
      prediction_tensor: A float tensor of shape [batch_size, num_anchors,
        num_classes] representing the predicted logits for each class
      target_tensor: A float tensor of shape [batch_size, num_anchors,
        num_classes] representing one-hot encoded classification targets
      weights: a float tensor of shape [batch_size, num_anchors]
      class_indices: (Optional) A 1-D integer tensor of class indices.
        If provided, computes loss only for the specified class indices.

    Returns:
      loss: a float tensor of shape [batch_size, num_anchors, num_classes]
        representing the value of the loss function.
    """
    weights = tf.expand_dims(weights, 2)
    if class_indices is not None:
      weights *= tf.reshape(
          ops.indices_to_dense_vector(class_indices,
                                      tf.shape(prediction_tensor)[2]),
          [1, 1, -1])
    per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
        labels=target_tensor, logits=prediction_tensor))
    prediction_probabilities = tf.sigmoid(prediction_tensor)
    p_t = ((target_tensor * prediction_probabilities) +
           ((1 - target_tensor) * (1 - prediction_probabilities)))
    modulating_factor = 1.0
    if self._gamma:
      modulating_factor = tf.pow(1.0 - p_t, self._gamma)
    alpha_weight_factor = 1.0
    if self._alpha is not None:
      alpha_weight_factor = (target_tensor * self._alpha +
                             (1 - target_tensor) * (1 - self._alpha))
    focal_cross_entropy_loss = (modulating_factor * alpha_weight_factor *
                                per_entry_cross_ent)
    return focal_cross_entropy_loss * weights 
Example #15
Source File: network.py    From SSH-TensorFlow with MIT License 5 votes vote down vote up
def _smooth_l1_loss(self, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, sigma=1.0, dim=[1]):
        sigma_2 = sigma ** 2
        box_diff = bbox_pred - bbox_targets
        in_box_diff = bbox_inside_weights * box_diff
        abs_in_box_diff = tf.abs(in_box_diff)
        smoothL1_sign = tf.stop_gradient(tf.to_float(tf.less(abs_in_box_diff, 1. / sigma_2)))
        in_loss_box = tf.pow(in_box_diff, 2) * (sigma_2 / 2.) * smoothL1_sign \
                      + (abs_in_box_diff - (0.5 / sigma_2)) * (1. - smoothL1_sign)
        out_loss_box = bbox_outside_weights * in_loss_box
        loss_box = tf.reduce_mean(tf.reduce_sum(
            out_loss_box,
            axis=dim
        ))
        return loss_box 
Example #16
Source File: prioritized_replay.py    From rlgraph with Apache License 2.0 5 votes vote down vote up
def _graph_fn_update_records(self, indices, update):
        num_records = get_batch_size(indices)
        max_priority = 0.0

        # Update has to be sequential.
        def insert_body(i, max_priority_):
            priority = tf.pow(x=update[i], y=self.alpha)

            sum_insert = self.sum_segment_tree.insert(
                index=indices[i],
                element=priority,
                insert_op=tf.add
            )
            min_insert = self.min_segment_tree.insert(
                index=indices[i],
                element=priority,
                insert_op=tf.minimum
            )
            # Keep track of current max priority element.
            max_priority_ = tf.maximum(x=max_priority_, y=priority)

            with tf.control_dependencies(control_inputs=[tf.group(sum_insert, min_insert)]):
                # TODO: This confuses the auto-return value detector.
                return i + 1, max_priority_

        def cond(i, max_priority_):
            return i < num_records - 1

        _, max_priority = tf.while_loop(
            cond=cond,
            body=insert_body,
            loop_vars=(0, max_priority)
        )

        assignment = self.assign_variable(ref=self.max_priority, value=max_priority)
        with tf.control_dependencies(control_inputs=[assignment]):
            return tf.no_op() 
Example #17
Source File: inputs.py    From cloudml-samples with Apache License 2.0 5 votes vote down vote up
def process_features(features):
  """ Use to implement custom feature engineering logic.

  Default behaviour is to return the original feature tensors dictionary as-is.

  Args:
      features: {string:tensors} - dictionary of feature tensors
  Returns:
      {string:tensors}: extended feature tensors dictionary
  """

  # examples - given:
  # 'x' and 'y' are two numeric features:
  # 'alpha' and 'beta' are two categorical features

  # # create new features using custom logic
  # features['x_2'] = tf.pow(features['x'],2)
  # features['y_2'] = tf.pow(features['y'], 2)
  # features['xy'] = features['x'] * features['y']
  # features['sin_x'] = tf.sin(features['x'])
  # features['cos_y'] = tf.cos(features['x'])
  # features['log_xy'] = tf.log(features['xy'])
  # features['sqrt_xy'] = tf.sqrt(features['xy'])

  # # add created features to metadata (if not already defined in metadata.py)
  # NUMERIC_FEATURE_NAMES_WITH_STATS['x_2']: None
  # NUMERIC_FEATURE_NAMES_WITH_STATS['y_2']: None
  # ....


  return features


# ******************************************************************************
# YOU NEED NOT TO CHANGE THIS FUNCTION TO READ DATA FILES
# ****************************************************************************** 
Example #18
Source File: inputs.py    From cloudml-samples with Apache License 2.0 5 votes vote down vote up
def process_features(features):
  """ Use to implement custom feature engineering logic.

  Default behaviour is to return the original feature tensors dictionary as-is.

  Args:
      features: {string:tensors} - dictionary of feature tensors
  Returns:
      {string:tensors}: extended feature tensors dictionary
  """

  # examples - given:
  # 'x' and 'y' are two numeric features:
  # 'alpha' and 'beta' are two categorical features

  # # create new features using custom logic
  # features['x_2'] = tf.pow(features['x'],2)
  # features['y_2'] = tf.pow(features['y'], 2)
  # features['xy'] = features['x'] * features['y']
  # features['sin_x'] = tf.sin(features['x'])
  # features['cos_y'] = tf.cos(features['x'])
  # features['log_xy'] = tf.log(features['xy'])
  # features['sqrt_xy'] = tf.sqrt(features['xy'])

  # # add created features to metadata (if not already defined in metadata.py)
  # NUMERIC_FEATURE_NAMES_WITH_STATS['x_2']: None
  # NUMERIC_FEATURE_NAMES_WITH_STATS['y_2']: None
  # ....


  return features


# ******************************************************************************
# YOU NEED NOT TO CHANGE THIS FUNCTION TO READ DATA FILES
# ****************************************************************************** 
Example #19
Source File: optim.py    From glow with MIT License 5 votes vote down vote up
def adam(params, cost_or_grads, alpha=3e-4, hps=None, epsilon=1e-8):
    updates = []
    if type(cost_or_grads) is not list:
        gs = tf.gradients(cost_or_grads, params)
    else:
        gs = cost_or_grads

    beta2 = 1-1./(hps.train_its*hps.polyak_epochs)

    # all-reduce
    grads = [Z.allreduce_mean(g) for g in gs]

    t = tf.Variable(1., 'adam_t')
    alpha_t = alpha * tf.sqrt((1. - tf.pow(beta2, t))) / \
        (1. - tf.pow(hps.beta1, t))
    updates.append(t.assign_add(1))

    for w, g in zip(params, grads):
        mom2 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m2')
        if hps.beta1 > 0:
            mom1 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m1')
            mom1_new = hps.beta1 * mom1 + (1. - hps.beta1) * g
            updates.append(mom1.assign(mom1_new))
        else:
            mom1_new = g
        m2_new = beta2 * mom2 + (1. - beta2) * tf.square(g)
        delta_t = mom1_new / (tf.sqrt(m2_new) + epsilon)
        w_new = hps.weight_decay * w - alpha_t * delta_t
        updates.append(mom2.assign(m2_new))
        updates.append(w.assign(w_new))

    # Polyak averaging
    polyak_avg_op, polyak_swap_op, ema = polyak(params, beta2)
    train_op = tf.group(polyak_avg_op, *updates)
    return train_op, polyak_swap_op, ema 
Example #20
Source File: siamese_net.py    From atec-nlp with MIT License 5 votes vote down vote up
def contrastive_loss(self, y, e):
        # margin and pos_weight can directly influence P and R metrics.
        l_1 = self._contrastive_loss_pos_weight * tf.pow(1-e, 2)
        l_0 = tf.square(tf.maximum(e-self._margin, 0))
        loss = tf.reduce_mean(y * l_1 + (1 - y) * l_0)
        return loss 
Example #21
Source File: ops.py    From HyperGAN with MIT License 5 votes vote down vote up
def gelu(self, x):
        return 0.5*x*(1+tf.nn.tanh(np.sqrt(2/np.pi)*(x+0.044715*tf.pow(x,3)))) 
Example #22
Source File: model.py    From rgn with MIT License 5 votes vote down vote up
def _curriculum(config, step, loss_history, dependency_ops):
    """ Creates TF ops for maintaining and advancing the curriculum. """

    # assign appropriate curriculum increment value
    for case in switch(config['behavior']):
        if case('fixed_rate'):
            # fixed rate, always return same number
            increment = tf.constant(config['rate'], name='curriculum_increment')
        elif case('loss_threshold'):
            # return fixed increment if last loss is below threshold, zero otherwise
            increment_pred = tf.less(loss_history[-1], config['threshold'], name='curriculum_predicate')
            full_increment_func = lambda: tf.constant(config['rate'], name='full_curriculum_increment')
            zero_increment_func = lambda: tf.constant(0.0,            name='zero_curriculum_increment')
            increment = tf.cond(increment_pred, full_increment_func, zero_increment_func)
        elif case('loss_change'):
            # predicate for increment type
            increment_pred = tf.not_equal(loss_history[0], DUMMY_LOSS, name='curriculum_predicate')

            # increment function for when loss history is still
            def full_increment_func():
                lin_seq = tf.expand_dims(tf.linspace(0., 1., config['change_num_iterations']), 1)
                ls_matrix = tf.concat([tf.ones_like(lin_seq), lin_seq], 1)
                ls_rhs = tf.expand_dims(loss_history, 1)
                ls_slope = tf.matrix_solve_ls(ls_matrix, ls_rhs)[1, 0]

                full_increment = tf.div(config['rate'], tf.pow(tf.abs(ls_slope) + 1, config['sharpness']), name='full_curriculum_increment')

                return full_increment

            # dummy increment function for when loss history is changing rapidly
            zero_increment_func = lambda: tf.constant(0.0, name='zero_curriculum_increment')

            # final conditional increment
            increment = tf.cond(increment_pred, full_increment_func, zero_increment_func)

    # create updating op. the semantics are such that training / gradient update is first performed before the curriculum is incremented.
    with tf.control_dependencies(dependency_ops):
        update_op = tf.assign_add(step, increment, name='update_curriculum_op')

    return update_op 
Example #23
Source File: modeling.py    From bert-for-tf2 with MIT License 5 votes vote down vote up
def gelu(x):
  """Gaussian Error Linear Unit.

  This is a smoother version of the RELU.
  Original paper: https://arxiv.org/abs/1606.08415
  Args:
    x: float Tensor to perform activation.

  Returns:
    `x` with the GELU activation applied.
  """
  cdf = 0.5 * (1.0 + tf.tanh(
      (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
  return x * cdf 
Example #24
Source File: dop853.py    From astroNN with MIT License 5 votes vote down vote up
def hinit(func, x, t, pos_neg, f0, iord, hmax, rtol, atol, args):
    """
    Estimate initial step size
    """
    sk = atol + rtol * tf.abs(x)
    dnf = tf.reduce_sum(tf.square(f0 / sk), axis=0)
    dny = tf.reduce_sum(tf.square(x / sk), axis=0)

    h = tf.sqrt(dny / dnf) * 0.01
    h = tf.reduce_min([h, tf.abs(hmax)])
    h = custom_sign(h, pos_neg)

    # perform an explicit Euler step
    xx1 = x + h * f0
    f1 = func(xx1, t[0] + h, *args)

    # estimate the second derivative of the solution
    der2 = tf.reduce_sum(tf.square((f1 - f0) / sk), axis=0)
    der2 = tf.sqrt(der2) / h

    # step size is computed such that h ** iord * max_d(norm(f0), norm(der2)) = 0.01
    der12 = tf.reduce_max([tf.abs(der2), tf.sqrt(dnf)])
    h1 = tf.pow(0.01 / der12, 1.0 / iord)

    h = tf.reduce_min([100.0 * tf.abs(h), tf.reduce_min([tf.abs(h1), tf.abs(hmax)])])

    return custom_sign(h, pos_neg), f0, f1, xx1 
Example #25
Source File: optim.py    From glow with MIT License 5 votes vote down vote up
def adam2(params, cost_or_grads, alpha=3e-4, hps=None, epsilon=1e-8):
    updates = []
    if type(cost_or_grads) is not list:
        gs = tf.gradients(cost_or_grads, params)
    else:
        gs = cost_or_grads

    beta2 = 1-1./(hps.train_its*hps.polyak_epochs)

    # all-reduce
    grads1 = [Z.allreduce_mean(g) for g in gs]
    grads2 = [Z.allreduce_mean(g**2) for g in gs]

    t = tf.Variable(1., 'adam_t')
    alpha_t = alpha * tf.sqrt((1. - tf.pow(beta2, t))) / \
        (1. - tf.pow(hps.beta1, t))
    updates.append(t.assign_add(1))

    for w, g1, g2 in zip(params, grads1, grads2):
        mom2 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m2')
        if hps.beta1 > 0:
            mom1 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m1')
            mom1_new = hps.beta1 * mom1 + (1. - hps.beta1) * g1
            updates.append(mom1.assign(mom1_new))
        else:
            mom1_new = g1
        m2_new = beta2 * mom2 + (1. - beta2) * g2
        delta_t = mom1_new / (tf.sqrt(m2_new) + epsilon)
        w_new = hps.weight_decay * w - alpha_t * delta_t
        updates.append(mom2.assign(m2_new))
        updates.append(w.assign(w_new))

    # Polyak averaging
    polyak_avg_op, polyak_swap_op, ema = polyak(params, beta2)
    train_op = tf.group(polyak_avg_op, *updates)
    return train_op, polyak_swap_op, ema 
Example #26
Source File: optim.py    From glow with MIT License 5 votes vote down vote up
def adam2_old(params, cost_or_grads, lr=3e-4, mom1=0.9, mom2=0.999, epsilon=1e-8):
    updates = []
    if type(cost_or_grads) is not list:
        gs = tf.gradients(cost_or_grads, params)
    else:
        gs = cost_or_grads

    # all-reduce
    grads1 = [Z.allreduce_mean(g) for g in gs]
    grads2 = [Z.allreduce_mean(tf.square(g)) for g in gs]
    mom2 = tf.maximum(0., 1. - (hvd.size() * (1 - mom2)))

    t = tf.Variable(1., 'adam_t')
    lr_t = lr * tf.sqrt((1. - tf.pow(mom2, t))) / (1. - tf.pow(mom1, t))
    updates.append(t.assign_add(1))

    for p, g1, g2 in zip(params, grads1, grads2):
        mg = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_mg')
        if mom1 > 0:
            v = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_v')
            v_t = mom1 * v + (1. - mom1) * g1
            updates.append(v.assign(v_t))
        else:
            v_t = g1
        mg_t = mom2 * mg + (1. - mom2) * g2
        delta_t = v_t / (tf.sqrt(mg_t) + epsilon)
        p_t = p - lr_t * delta_t
        updates.append(mg.assign(mg_t))
        updates.append(p.assign(p_t))
    return tf.group(*updates) 
Example #27
Source File: model.py    From lm-human-preferences with MIT License 5 votes vote down vote up
def gelu(x):
    with tf.name_scope('gelu'):
        return 0.5*x*(1+tf.tanh(np.sqrt(2/np.pi)*(x+0.044715*tf.pow(x, 3)))) 
Example #28
Source File: optim.py    From glow with MIT License 5 votes vote down vote up
def adamax(params, cost_or_grads, alpha=3e-4, hps=None, epsilon=1e-8):
    updates = []
    if type(cost_or_grads) is not list:
        gs = tf.gradients(cost_or_grads, params)
    else:
        gs = cost_or_grads

    beta2 = 1-1./(hps.train_its*hps.polyak_epochs)

    # all-reduce
    grads = [Z.allreduce_mean(g) for g in gs]

    t = tf.Variable(1., 'adam_t')
    alpha_t = alpha * tf.sqrt((1. - tf.pow(beta2, t))) / \
        (1. - tf.pow(hps.beta1, t))
    updates.append(t.assign_add(1))

    for w, g in zip(params, grads):
        mom2 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m2')
        if hps.beta1 > 0:
            mom1 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m1')
            mom1_new = hps.beta1 * mom1 + (1. - hps.beta1) * g
            updates.append(mom1.assign(mom1_new))
        else:
            mom1_new = g
        m2_new = tf.maximum(beta2 * mom2, abs(g))
        delta_t = mom1_new / (m2_new + epsilon)
        w_new = hps.weight_decay * w - alpha_t * delta_t
        updates.append(mom2.assign(m2_new))
        updates.append(w.assign(w_new))

    # Polyak averaging
    polyak_avg_op, polyak_swap_op, ema = polyak(params, beta2)
    train_op = tf.group(polyak_avg_op, *updates)
    return train_op, polyak_swap_op, ema 
Example #29
Source File: optim.py    From glow with MIT License 5 votes vote down vote up
def adam(params, cost_or_grads, alpha=3e-4, hps=None, epsilon=1e-8):
    updates = []
    if type(cost_or_grads) is not list:
        gs = tf.gradients(cost_or_grads, params)
    else:
        gs = cost_or_grads

    beta2 = 1-1./(hps.train_its*hps.polyak_epochs)

    # all-reduce
    grads = [Z.allreduce_mean(g) for g in gs]

    t = tf.Variable(1., 'adam_t')
    alpha_t = alpha * tf.sqrt((1. - tf.pow(beta2, t))) / \
        (1. - tf.pow(hps.beta1, t))
    updates.append(t.assign_add(1))

    for w, g in zip(params, grads):
        mom2 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m2')
        if hps.beta1 > 0:
            mom1 = tf.Variable(tf.zeros(w.get_shape()), w.name + '_adam_m1')
            mom1_new = hps.beta1 * mom1 + (1. - hps.beta1) * g
            updates.append(mom1.assign(mom1_new))
        else:
            mom1_new = g
        m2_new = beta2 * mom2 + (1. - beta2) * tf.square(g)
        delta_t = mom1_new / (tf.sqrt(m2_new) + epsilon)
        w_new = hps.weight_decay * w - alpha_t * delta_t
        updates.append(mom2.assign(m2_new))
        updates.append(w.assign(w_new))

    # Polyak averaging
    polyak_avg_op, polyak_swap_op, ema = polyak(params, beta2)
    train_op = tf.group(polyak_avg_op, *updates)
    return train_op, polyak_swap_op, ema 
Example #30
Source File: losses.py    From MultiKE with MIT License 5 votes vote down vote up
def orthogonal_loss(mapping, eye):
    loss = tf.reduce_sum(tf.reduce_sum(tf.pow(tf.matmul(mapping, mapping, transpose_b=True) - eye, 2), 1))
    return loss